You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/08/23 23:17:54 UTC

[01/50] [abbrv] lucenenet git commit: Added missing Core.Util.OfflineSorter members (+ tests) required by Hunspell.

Repository: lucenenet
Updated Branches:
  refs/heads/analysis-work 76c4a537d -> 4deebe8fe


Added missing Core.Util.OfflineSorter members (+ tests) required by Hunspell.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/572ad694
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/572ad694
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/572ad694

Branch: refs/heads/analysis-work
Commit: 572ad694199d3c7a4832ae12abcbd11d6c960e7d
Parents: 294f1c2
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Aug 18 17:20:57 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 11:33:56 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Lucene.Net.csproj           |    1 +
 .../Compatibility/BinaryReaderDataInput.cs      |   34 +
 src/Lucene.Net.Core/Util/OfflineSorter.cs       | 1161 +++++++++---------
 src/Lucene.Net.Tests/Lucene.Net.Tests.csproj    |    1 +
 .../core/Util/TestOfflineSorter.cs              |  143 ++-
 5 files changed, 698 insertions(+), 642 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/572ad694/src/Lucene.Net.Core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.csproj b/src/Lucene.Net.Core/Lucene.Net.csproj
index 8d33617..26c8906 100644
--- a/src/Lucene.Net.Core/Lucene.Net.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.csproj
@@ -618,6 +618,7 @@
     <Compile Include="Support\Character.cs" />
     <Compile Include="Support\Arrays.cs" />
     <Compile Include="Support\CharacterIterator.cs" />
+    <Compile Include="Support\Compatibility\BinaryReaderDataInput.cs" />
     <Compile Include="Support\Compatibility\BinaryWriterDataOutput.cs" />
     <Compile Include="Support\Compatibility\Collections.cs" />
     <Compile Include="Support\ConcurrentHashMapWrapper.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/572ad694/src/Lucene.Net.Core/Support/Compatibility/BinaryReaderDataInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/Compatibility/BinaryReaderDataInput.cs b/src/Lucene.Net.Core/Support/Compatibility/BinaryReaderDataInput.cs
new file mode 100644
index 0000000..d355df0
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/Compatibility/BinaryReaderDataInput.cs
@@ -0,0 +1,34 @@
+\ufeffusing Lucene.Net.Store;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Support.Compatibility
+{
+    public class BinaryReaderDataInput : DataInput, IDisposable
+    {
+        private readonly BinaryReader br;
+        public BinaryReaderDataInput(BinaryReader br)
+        {
+            this.br = br;
+        }
+       
+        public override byte ReadByte()
+        {
+            return br.ReadByte();
+        }
+
+        public override void ReadBytes(byte[] b, int offset, int len)
+        {
+            byte[] temp = br.ReadBytes(len);
+            for (int i = offset; i < (offset + len) && i < temp.Length; i++)
+            {
+                b[i] = temp[i];
+            }
+        }
+
+        public void Dispose()
+        {
+            br.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/572ad694/src/Lucene.Net.Core/Util/OfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OfflineSorter.cs b/src/Lucene.Net.Core/Util/OfflineSorter.cs
index db0a86b..4f3084b 100644
--- a/src/Lucene.Net.Core/Util/OfflineSorter.cs
+++ b/src/Lucene.Net.Core/Util/OfflineSorter.cs
@@ -34,487 +34,488 @@ namespace Lucene.Net.Util
     /// </summary>
     public sealed class OfflineSorter
     {
-        // LUCENENET TODO: keep this code as it will be used by subprojects once ported
-        //        private bool InstanceFieldsInitialized = false;
-        //
-        //        private void InitializeInstanceFields()
-        //        {
-        //            Buffer = new BytesRefArray(BufferBytesUsed);
-        //        }
-        //
-        //        /// <summary>
-        //        /// Convenience constant for megabytes </summary>
-        //        public const long MB = 1024 * 1024;
-        //        /// <summary>
-        //        /// Convenience constant for gigabytes </summary>
-        //        public static readonly long GB = MB * 1024;
-        //
-        //        /// <summary>
-        //        /// Minimum recommended buffer size for sorting.
-        //        /// </summary>
-        //        public const long MIN_BUFFER_SIZE_MB = 32;
-        //
-        //        /// <summary>
-        //        /// Absolute minimum required buffer size for sorting.
-        //        /// </summary>
-        //        public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
-        //        private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
-        //
-        //        /// <summary>
-        //        /// Maximum number of temporary files before doing an intermediate merge.
-        //        /// </summary>
-        //        public const int MAX_TEMPFILES = 128;
-        //
-        //        /// <summary>
-        //        /// A bit more descriptive unit for constructors.
-        //        /// </summary>
-        //        /// <seealso cref= #automatic() </seealso>
-        //        /// <seealso cref= #megabytes(long) </seealso>
-        //        public sealed class BufferSize
-        //        {
-        //            internal readonly int Bytes;
-        //
-        //            internal BufferSize(long bytes)
-        //            {
-        //                if (bytes > int.MaxValue)
-        //                {
-        //                    throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
-        //                }
-        //
-        //                if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-        //                {
-        //                    throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
-        //                }
-        //
-        //                this.Bytes = (int)bytes;
-        //            }
-        //
-        //            /// <summary>
-        //            /// Creates a <seealso cref="BufferSize"/> in MB. The given
-        //            /// values must be &gt; 0 and &lt; 2048.
-        //            /// </summary>
-        //            public static BufferSize Megabytes(long mb)
-        //            {
-        //                return new BufferSize(mb * MB);
-        //            }
-        //
-        //            /// <summary>
-        //            /// Approximately half of the currently available free heap, but no less
-        //            /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
-        //            /// is insufficient or if there is a large portion of unallocated heap-space available
-        //            /// for sorting consult with max allowed heap size.
-        //            /// </summary>
-        //            public static BufferSize Automatic()
-        //            {
-        //                var proc = Process.GetCurrentProcess();
-        //
-        //                // take sizes in "conservative" order
-        //                long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
-        //                long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
-        //                long free = rt.freeMemory(); // unused portion of currently allocated
-        //                long totalAvailableBytes = max - total + free;
-        //
-        //                // by free mem (attempting to not grow the heap for this)
-        //                long sortBufferByteSize = free / 2;
-        //                const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
-        //                if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
-        //                {
-        //                    if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
-        //                    {
-        //                        sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
-        //                    }
-        //                    else
-        //                    {
-        //                        //heap seems smallish lets be conservative fall back to the free/2
-        //                        sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
-        //                    }
-        //                }
-        //                return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
-        //            }
-        //        }
-        //
-        //        /// <summary>
-        //        /// Sort info (debugging mostly).
-        //        /// </summary>
-        //        public class SortInfo
-        //        {
-        //            internal bool InstanceFieldsInitialized = false;
-        //
-        //            internal virtual void InitializeInstanceFields()
-        //            {
-        //                BufferSize = OuterInstance.RamBufferSize.Bytes;
-        //            }
-        //
-        //            private readonly OfflineSorter OuterInstance;
-        //
-        //            /// <summary>
-        //            /// number of temporary files created when merging partitions </summary>
-        //            public int TempMergeFiles;
-        //            /// <summary>
-        //            /// number of partition merges </summary>
-        //            public int MergeRounds;
-        //            /// <summary>
-        //            /// number of lines of data read </summary>
-        //            public int Lines;
-        //            /// <summary>
-        //            /// time spent merging sorted partitions (in milliseconds) </summary>
-        //            public long MergeTime;
-        //            /// <summary>
-        //            /// time spent sorting data (in milliseconds) </summary>
-        //            public long SortTime;
-        //            /// <summary>
-        //            /// total time spent (in milliseconds) </summary>
-        //            public long TotalTime;
-        //            /// <summary>
-        //            /// time spent in i/o read (in milliseconds) </summary>
-        //            public long ReadTime;
-        //            /// <summary>
-        //            /// read buffer size (in bytes) </summary>
-        //            public long BufferSize;
-        //
-        //            /// <summary>
-        //            /// create a new SortInfo (with empty statistics) for debugging </summary>
-        //            public SortInfo(OfflineSorter outerInstance)
-        //            {
-        //                this.OuterInstance = outerInstance;
-        //
-        //                if (!InstanceFieldsInitialized)
-        //                {
-        //                    InitializeInstanceFields();
-        //                    InstanceFieldsInitialized = true;
-        //                }
-        //            }
-        //
-        //            public override string ToString()
-        //            {
-        //                return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
-        //            }
-        //        }
-        //
-        //        private readonly BufferSize RamBufferSize;
-        //
-        //        private readonly Counter BufferBytesUsed = Counter.NewCounter();
-        //        private BytesRefArray Buffer;
-        //        private SortInfo sortInfo;
-        //        private readonly int MaxTempFiles;
-        //        private readonly IComparer<BytesRef> comparator;
-        //
-        //        /// <summary>
-        //        /// Default comparator: sorts in binary (codepoint) order </summary>
-        //        public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
-        //
-        //        /// <summary>
-        //        /// Defaults constructor.
-        //        /// </summary>
-        //        /// <seealso cref= #defaultTempDir() </seealso>
-        //        /// <seealso cref= BufferSize#automatic() </seealso>
-        //        public OfflineSorter()
-        //            : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-        //        {
-        //            if (!InstanceFieldsInitialized)
-        //            {
-        //                InitializeInstanceFields();
-        //                InstanceFieldsInitialized = true;
-        //            }
-        //        }
-        //
-        //        /// <summary>
-        //        /// Defaults constructor with a custom comparator.
-        //        /// </summary>
-        //        /// <seealso cref= #defaultTempDir() </seealso>
-        //        /// <seealso cref= BufferSize#automatic() </seealso>
-        //        public OfflineSorter(IComparer<BytesRef> comparator)
-        //            : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-        //        {
-        //            if (!InstanceFieldsInitialized)
-        //            {
-        //                InitializeInstanceFields();
-        //                InstanceFieldsInitialized = true;
-        //            }
-        //        }
-        //
-        //        /// <summary>
-        //        /// All-details constructor.
-        //        /// </summary>
-        //        public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
-        //        {
-        //            if (!InstanceFieldsInitialized)
-        //            {
-        //                InitializeInstanceFields();
-        //                InstanceFieldsInitialized = true;
-        //            }
-        //            if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-        //            {
-        //                throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
-        //            }
-        //
-        //            if (maxTempfiles < 2)
-        //            {
-        //                throw new System.ArgumentException("maxTempFiles must be >= 2");
-        //            }
-        //
-        //            this.RamBufferSize = ramBufferSize;
-        //            this.MaxTempFiles = maxTempfiles;
-        //            this.comparator = comparator;
-        //        }
-        //
-        //        /// <summary>
-        //        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
-        //        /// memory may deviate from the hint (may be smaller or larger).
-        //        /// </summary>
-        //        public SortInfo Sort(FileInfo input, FileInfo output)
-        //        {
-        //            sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
-        //
-        //            output.Delete();
-        //
-        //            var merges = new List<FileInfo>();
-        //            bool success2 = false;
-        //            try
-        //            {
-        //                var inputStream = new ByteSequencesReader(input);
-        //                bool success = false;
-        //                try
-        //                {
-        //                    int lines = 0;
-        //                    while ((lines = ReadPartition(inputStream)) > 0)
-        //                    {
-        //                        merges.Add(SortPartition(lines));
-        //                        sortInfo.TempMergeFiles++;
-        //                        sortInfo.Lines += lines;
-        //
-        //                        // Handle intermediate merges.
-        //                        if (merges.Count == MaxTempFiles)
-        //                        {
-        //                            var intermediate = new FileInfo(Path.GetTempFileName());
-        //                            try
-        //                            {
-        //                                MergePartitions(merges, intermediate);
-        //                            }
-        //                            finally
-        //                            {
-        //                                foreach (var file in merges)
-        //                                {
-        //                                    file.Delete();
-        //                                }
-        //                                merges.Clear();
-        //                                merges.Add(intermediate);
-        //                            }
-        //                            sortInfo.TempMergeFiles++;
-        //                        }
-        //                    }
-        //                    success = true;
-        //                }
-        //                finally
-        //                {
-        //                    if (success)
-        //                    {
-        //                        IOUtils.Close(inputStream);
-        //                    }
-        //                    else
-        //                    {
-        //                        IOUtils.CloseWhileHandlingException(inputStream);
-        //                    }
-        //                }
-        //
-        //                // One partition, try to rename or copy if unsuccessful.
-        //                if (merges.Count == 1)
-        //                {
-        //                    FileInfo single = merges[0];
-        //                    Copy(single, output);
-        //                    try
-        //                    {
-        //                        File.Delete(single.FullName);
-        //                    }
-        //                    catch (Exception)
-        //                    {
-        //                        // ignored
-        //                    }
-        //                }
-        //                else
-        //                {
-        //                    // otherwise merge the partitions with a priority queue.
-        //                    MergePartitions(merges, output);
-        //                }
-        //                success2 = true;
-        //            }
-        //            finally
-        //            {
-        //                foreach (FileInfo file in merges)
-        //                {
-        //                    file.Delete();
-        //                }
-        //                if (!success2)
-        //                {
-        //                    output.Delete();
-        //                }
-        //            }
-        //
-        //            sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
-        //            return sortInfo;
-        //        }
-        //
-        //        /// <summary>
-        //        /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
-        //        /// or not available, an IOException is thrown
-        //        /// </summary>
-        //        public static DirectoryInfo DefaultTempDir()
-        //        {
-        //            return new DirectoryInfo(Path.GetTempPath());
-        //        }
-        //
-        //        /// <summary>
-        //        /// Copies one file to another.
-        //        /// </summary>
-        //        private static void Copy(FileInfo file, FileInfo output)
-        //        {
-        //            File.Copy(file.FullName, output.FullName);
-        //        }
-        //
-        //        /// <summary>
-        //        /// Sort a single partition in-memory. </summary>
-        //        internal FileInfo SortPartition(int len)
-        //        {
-        //            var data = this.Buffer;
-        //            var tempFile = new FileInfo(Path.GetTempFileName());
-        //            //var tempFile1 = File.Create(new ());
-        //            //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
-        //
-        //            long start = DateTime.Now.Millisecond;
-        //            sortInfo.SortTime += (DateTime.Now.Millisecond - start);
-        //
-        //            var @out = new ByteSequencesWriter(tempFile);
-        //            BytesRef spare;
-        //            try
-        //            {
-        //                BytesRefIterator iter = Buffer.Iterator(comparator);
-        //                while ((spare = iter.Next()) != null)
-        //                {
-        //                    Debug.Assert(spare.Length <= short.MaxValue);
-        //                    @out.Write(spare);
-        //                }
-        //
-        //                @out.Dispose();
-        //
-        //                // Clean up the buffer for the next partition.
-        //                data.Clear();
-        //                return tempFile;
-        //            }
-        //            finally
-        //            {
-        //                IOUtils.Close(@out);
-        //            }
-        //        }
-        //
-        //        /// <summary>
-        //        /// Merge a list of sorted temporary files (partitions) into an output file </summary>
-        //        internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
-        //        {
-        //            long start = DateTime.Now.Millisecond;
-        //
-        //            var @out = new ByteSequencesWriter(outputFile);
-        //
-        //            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
-        //
-        //            var streams = new ByteSequencesReader[merges.Count];
-        //            try
-        //            {
-        //                // Open streams and read the top for each file
-        //                for (int i = 0; i < merges.Count; i++)
-        //                {
-        //                    streams[i] = new ByteSequencesReader(merges[i]);
-        //                    sbyte[] line = streams[i].Read();
-        //                    if (line != null)
-        //                    {
-        //                        queue.InsertWithOverflow(new FileAndTop(i, line));
-        //                    }
-        //                }
-        //
-        //                // Unix utility sort() uses ordered array of files to pick the next line from, updating
-        //                // it as it reads new lines. The PQ used here is a more elegant solution and has
-        //                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
-        //                // so it shouldn't make much of a difference (didn't check).
-        //                FileAndTop top;
-        //                while ((top = queue.Top()) != null)
-        //                {
-        //                    @out.Write(top.Current);
-        //                    if (!streams[top.Fd].Read(top.Current))
-        //                    {
-        //                        queue.Pop();
-        //                    }
-        //                    else
-        //                    {
-        //                        queue.UpdateTop();
-        //                    }
-        //                }
-        //
-        //                SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
-        //                SortInfo.MergeRounds++;
-        //            }
-        //            finally
-        //            {
-        //                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
-        //                // happening in closing streams.
-        //                try
-        //                {
-        //                    IOUtils.Close(streams);
-        //                }
-        //                finally
-        //                {
-        //                    IOUtils.Close(@out);
-        //                }
-        //            }
-        //        }
-        //
-        //        private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
-        //        {
-        //            private readonly OfflineSorter OuterInstance;
-        //
-        //            public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
-        //                : base(size)
-        //            {
-        //                this.OuterInstance = outerInstance;
-        //            }
-        //
-        //            public override bool LessThan(FileAndTop a, FileAndTop b)
-        //            {
-        //                return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
-        //            }
-        //        }
-        //
-        //        /// <summary>
-        //        /// Read in a single partition of data </summary>
-        //        internal int ReadPartition(ByteSequencesReader reader)
-        //        {
-        //            long start = DateTime.Now.Millisecond;
-        //            var scratch = new BytesRef();
-        //            while ((scratch.Bytes = reader.Read()) != null)
-        //            {
-        //                scratch.Length = scratch.Bytes.Length;
-        //                Buffer.Append(scratch);
-        //                // Account for the created objects.
-        //                // (buffer slots do not account to buffer size.)
-        //                if (RamBufferSize.Bytes < BufferBytesUsed.Get())
-        //                {
-        //                    break;
-        //                }
-        //            }
-        //            sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
-        //            return Buffer.Size();
-        //        }
-        //
-        //        internal class FileAndTop
-        //        {
-        //            internal readonly int Fd;
-        //            internal readonly BytesRef Current;
-        //
-        //            internal FileAndTop(int fd, sbyte[] firstLine)
-        //            {
-        //                this.Fd = fd;
-        //                this.Current = new BytesRef(firstLine);
-        //            }
-        //        }
-        //
+        private bool InstanceFieldsInitialized = false;
+
+        private void InitializeInstanceFields()
+        {
+            Buffer = new BytesRefArray(BufferBytesUsed);
+        }
+
+        /// <summary>
+        /// Convenience constant for megabytes </summary>
+        public const long MB = 1024 * 1024;
+        /// <summary>
+        /// Convenience constant for gigabytes </summary>
+        public static readonly long GB = MB * 1024;
+
+        /// <summary>
+        /// Minimum recommended buffer size for sorting.
+        /// </summary>
+        public const long MIN_BUFFER_SIZE_MB = 32;
+
+        /// <summary>
+        /// Absolute minimum required buffer size for sorting.
+        /// </summary>
+        public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
+        private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
+
+        /// <summary>
+        /// Maximum number of temporary files before doing an intermediate merge.
+        /// </summary>
+        public const int MAX_TEMPFILES = 128;
+
+        /// <summary>
+        /// A bit more descriptive unit for constructors.
+        /// </summary>
+        /// <seealso cref= #automatic() </seealso>
+        /// <seealso cref= #megabytes(long) </seealso>
+        public sealed class BufferSize
+        {
+            internal readonly int Bytes;
+
+            internal BufferSize(long bytes)
+            {
+                if (bytes > int.MaxValue)
+                {
+                    throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
+                }
+
+                if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+                {
+                    throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
+                }
+
+                this.Bytes = (int)bytes;
+            }
+
+            /// <summary>
+            /// Creates a <seealso cref="BufferSize"/> in MB. The given
+            /// values must be &gt; 0 and &lt; 2048.
+            /// </summary>
+            public static BufferSize Megabytes(long mb)
+            {
+                return new BufferSize(mb * MB);
+            }
+
+            /// <summary>
+            /// Approximately half of the currently available free heap, but no less
+            /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
+            /// is insufficient or if there is a large portion of unallocated heap-space available
+            /// for sorting consult with max allowed heap size.
+            /// </summary>
+            public static BufferSize Automatic()
+            {
+                long max, total, free;
+                using (var proc = Process.GetCurrentProcess())
+                {
+                    // take sizes in "conservative" order
+                    max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
+                    total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
+                    free = proc.PrivateMemorySize64; // unused portion of currently allocated; java has it as Runtime.freeMemory();
+                }
+                long totalAvailableBytes = max - total + free;
+
+                // by free mem (attempting to not grow the heap for this)
+                long sortBufferByteSize = free / 2;
+                const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
+                if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
+                {
+                    if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
+                    {
+                        sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
+                    }
+                    else
+                    {
+                        //heap seems smallish lets be conservative fall back to the free/2
+                        sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
+                    }
+                }
+                return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
+            }
+        }
+
+        /// <summary>
+        /// Sort info (debugging mostly).
+        /// </summary>
+        public class SortInfo
+        {
+            internal bool InstanceFieldsInitialized = false;
+
+            internal virtual void InitializeInstanceFields()
+            {
+                BufferSize = OuterInstance.RamBufferSize.Bytes;
+            }
+
+            private readonly OfflineSorter OuterInstance;
+
+            /// <summary>
+            /// number of temporary files created when merging partitions </summary>
+            public int TempMergeFiles;
+            /// <summary>
+            /// number of partition merges </summary>
+            public int MergeRounds;
+            /// <summary>
+            /// number of lines of data read </summary>
+            public int Lines;
+            /// <summary>
+            /// time spent merging sorted partitions (in milliseconds) </summary>
+            public long MergeTime;
+            /// <summary>
+            /// time spent sorting data (in milliseconds) </summary>
+            public long SortTime;
+            /// <summary>
+            /// total time spent (in milliseconds) </summary>
+            public long TotalTime;
+            /// <summary>
+            /// time spent in i/o read (in milliseconds) </summary>
+            public long ReadTime;
+            /// <summary>
+            /// read buffer size (in bytes) </summary>
+            public long BufferSize;
+
+            /// <summary>
+            /// create a new SortInfo (with empty statistics) for debugging </summary>
+            public SortInfo(OfflineSorter outerInstance)
+            {
+                this.OuterInstance = outerInstance;
+
+                if (!InstanceFieldsInitialized)
+                {
+                    InitializeInstanceFields();
+                    InstanceFieldsInitialized = true;
+                }
+            }
+
+            public override string ToString()
+            {
+                return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
+            }
+        }
+
+        private readonly BufferSize RamBufferSize;
+
+        private readonly Counter BufferBytesUsed = Counter.NewCounter();
+        private BytesRefArray Buffer;
+        private SortInfo sortInfo;
+        private readonly int MaxTempFiles;
+        private readonly IComparer<BytesRef> comparator;
+
+        /// <summary>
+        /// Default comparator: sorts in binary (codepoint) order </summary>
+        public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
+
+        /// <summary>
+        /// Defaults constructor.
+        /// </summary>
+        /// <seealso cref= #defaultTempDir() </seealso>
+        /// <seealso cref= BufferSize#automatic() </seealso>
+        public OfflineSorter()
+            : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+        }
+
+        /// <summary>
+        /// Defaults constructor with a custom comparator.
+        /// </summary>
+        /// <seealso cref= #defaultTempDir() </seealso>
+        /// <seealso cref= BufferSize#automatic() </seealso>
+        public OfflineSorter(IComparer<BytesRef> comparator)
+            : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+        }
+
+        /// <summary>
+        /// All-details constructor.
+        /// </summary>
+        public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, DirectoryInfo tempDirectory, int maxTempfiles)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+            if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+            {
+                throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
+            }
+
+            if (maxTempfiles < 2)
+            {
+                throw new System.ArgumentException("maxTempFiles must be >= 2");
+            }
+
+            this.RamBufferSize = ramBufferSize;
+            this.MaxTempFiles = maxTempfiles;
+            this.comparator = comparator;
+        }
+
+        /// <summary>
+        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
+        /// memory may deviate from the hint (may be smaller or larger).
+        /// </summary>
+        public SortInfo Sort(FileInfo input, FileInfo output)
+        {
+            sortInfo = new SortInfo(this) { TotalTime = DateTime.Now.Millisecond };
+
+            output.Delete();
+
+            var merges = new List<FileInfo>();
+            bool success2 = false;
+            try
+            {
+                var inputStream = new ByteSequencesReader(input);
+                bool success = false;
+                try
+                {
+                    int lines = 0;
+                    while ((lines = ReadPartition(inputStream)) > 0)
+                    {
+                        merges.Add(SortPartition(lines));
+                        sortInfo.TempMergeFiles++;
+                        sortInfo.Lines += lines;
+
+                        // Handle intermediate merges.
+                        if (merges.Count == MaxTempFiles)
+                        {
+                            var intermediate = new FileInfo(Path.GetTempFileName());
+                            try
+                            {
+                                MergePartitions(merges, intermediate);
+                            }
+                            finally
+                            {
+                                foreach (var file in merges)
+                                {
+                                    file.Delete();
+                                }
+                                merges.Clear();
+                                merges.Add(intermediate);
+                            }
+                            sortInfo.TempMergeFiles++;
+                        }
+                    }
+                    success = true;
+                }
+                finally
+                {
+                    if (success)
+                    {
+                        IOUtils.Close(inputStream);
+                    }
+                    else
+                    {
+                        IOUtils.CloseWhileHandlingException(inputStream);
+                    }
+                }
+
+                // One partition, try to rename or copy if unsuccessful.
+                if (merges.Count == 1)
+                {
+                    FileInfo single = merges[0];
+                    Copy(single, output);
+                    try
+                    {
+                        File.Delete(single.FullName);
+                    }
+                    catch (Exception)
+                    {
+                        // ignored
+                    }
+                }
+                else
+                {
+                    // otherwise merge the partitions with a priority queue.
+                    MergePartitions(merges, output);
+                }
+                success2 = true;
+            }
+            finally
+            {
+                foreach (FileInfo file in merges)
+                {
+                    file.Delete();
+                }
+                if (!success2)
+                {
+                    output.Delete();
+                }
+            }
+
+            sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
+            return sortInfo;
+        }
+
+        /// <summary>
+        /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
+        /// or not available, an IOException is thrown
+        /// </summary>
+        public static DirectoryInfo DefaultTempDir()
+        {
+            return new DirectoryInfo(Path.GetTempPath());
+        }
+
+        /// <summary>
+        /// Copies one file to another.
+        /// </summary>
+        private static void Copy(FileInfo file, FileInfo output)
+        {
+            File.Copy(file.FullName, output.FullName);
+        }
+
+        /// <summary>
+        /// Sort a single partition in-memory. </summary>
+        internal FileInfo SortPartition(int len)
+        {
+            var data = this.Buffer;
+            var tempFile = new FileInfo(Path.GetTempFileName());
+            //var tempFile1 = File.Create(new ());
+            //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
+
+            long start = DateTime.Now.Millisecond;
+            sortInfo.SortTime += (DateTime.Now.Millisecond - start);
+
+            var @out = new ByteSequencesWriter(tempFile);
+            BytesRef spare;
+            try
+            {
+                BytesRefIterator iter = Buffer.Iterator(comparator);
+                while ((spare = iter.Next()) != null)
+                {
+                    Debug.Assert(spare.Length <= short.MaxValue);
+                    @out.Write(spare);
+                }
+
+                @out.Dispose();
+
+                // Clean up the buffer for the next partition.
+                data.Clear();
+                return tempFile;
+            }
+            finally
+            {
+                IOUtils.Close(@out);
+            }
+        }
+
+        /// <summary>
+        /// Merge a list of sorted temporary files (partitions) into an output file </summary>
+        internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
+        {
+            long start = DateTime.Now.Millisecond;
+
+            var @out = new ByteSequencesWriter(outputFile);
+
+            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
+
+            var streams = new ByteSequencesReader[merges.Count];
+            try
+            {
+                // Open streams and read the top for each file
+                for (int i = 0; i < merges.Count; i++)
+                {
+                    streams[i] = new ByteSequencesReader(merges[i]);
+                    byte[] line = streams[i].Read();
+                    if (line != null)
+                    {
+                        queue.InsertWithOverflow(new FileAndTop(i, line));
+                    }
+                }
+
+                // Unix utility sort() uses ordered array of files to pick the next line from, updating
+                // it as it reads new lines. The PQ used here is a more elegant solution and has
+                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
+                // so it shouldn't make much of a difference (didn't check).
+                FileAndTop top;
+                while ((top = queue.Top()) != null)
+                {
+                    @out.Write(top.Current);
+                    if (!streams[top.Fd].Read(top.Current))
+                    {
+                        queue.Pop();
+                    }
+                    else
+                    {
+                        queue.UpdateTop();
+                    }
+                }
+
+                sortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
+                sortInfo.MergeRounds++;
+            }
+            finally
+            {
+                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
+                // happening in closing streams.
+                try
+                {
+                    IOUtils.Close(streams);
+                }
+                finally
+                {
+                    IOUtils.Close(@out);
+                }
+            }
+        }
+
+        private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
+        {
+            private readonly OfflineSorter OuterInstance;
+
+            public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
+                : base(size)
+            {
+                this.OuterInstance = outerInstance;
+            }
+
+            public override bool LessThan(FileAndTop a, FileAndTop b)
+            {
+                return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
+            }
+        }
+
+        /// <summary>
+        /// Read in a single partition of data </summary>
+        internal int ReadPartition(ByteSequencesReader reader)
+        {
+            long start = DateTime.Now.Millisecond;
+            var scratch = new BytesRef();
+            while ((scratch.Bytes = reader.Read()) != null)
+            {
+                scratch.Length = scratch.Bytes.Length;
+                Buffer.Append(scratch);
+                // Account for the created objects.
+                // (buffer slots do not account to buffer size.)
+                if (RamBufferSize.Bytes < BufferBytesUsed.Get())
+                {
+                    break;
+                }
+            }
+            sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
+            return Buffer.Size();
+        }
+
+        internal class FileAndTop
+        {
+            internal readonly int Fd;
+            internal readonly BytesRef Current;
+
+            internal FileAndTop(int fd, byte[] firstLine)
+            {
+                this.Fd = fd;
+                this.Current = new BytesRef(firstLine);
+            }
+        }
+
 
         /// <summary>
         /// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
@@ -526,8 +527,8 @@ namespace Lucene.Net.Util
 
             /// <summary>
             /// Constructs a ByteSequencesWriter to the provided File </summary>
-            public ByteSequencesWriter(string filePath)
-                : this(new BinaryWriterDataOutput(new BinaryWriter(new FileStream(filePath, FileMode.Open))))
+            public ByteSequencesWriter(FileInfo file)
+                : this(new BinaryWriterDataOutput(new BinaryWriter(new FileStream(file.FullName, FileMode.OpenOrCreate))))
             {
             }
 
@@ -582,102 +583,102 @@ namespace Lucene.Net.Util
                 }
             }
         }
-//
-//        /// <summary>
-//        /// Utility class to read length-prefixed byte[] entries from an input.
-//        /// Complementary to <seealso cref="ByteSequencesWriter"/>.
-//        /// </summary>
-//        public class ByteSequencesReader : IDisposable
-//        {
-//            internal readonly DataInput inputStream;
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesReader from the provided File </summary>
-//            public ByteSequencesReader(FileInfo file)
-//                : this(new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
-//            {
-//            }
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesReader from the provided DataInput </summary>
-//            public ByteSequencesReader(DataInput inputStream)
-//            {
-//                this.inputStream = inputStream;
-//            }
-//
-//            /// <summary>
-//            /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
-//            /// storage is resized if needed.
-//            /// </summary>
-//            /// <returns> Returns <code>false</code> if EOF occurred when trying to read
-//            /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
-//            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-//            public virtual bool Read(BytesRef @ref)
-//            {
-//                short length;
-//                try
-//                {
-//                    length = inputStream.ReadShort();
-//                }
-//                catch (EOFException)
-//                {
-//                    return false;
-//                }
-//
-//                @ref.Grow(length);
-//                @ref.Offset = 0;
-//                @ref.Length = length;
-//                inputStream.ReadFully(@ref.Bytes, 0, length);
-//                return true;
-//            }
-//
-//            /// <summary>
-//            /// Reads the next entry and returns it if successful.
-//            /// </summary>
-//            /// <seealso cref= #read(BytesRef)
-//            /// </seealso>
-//            /// <returns> Returns <code>null</code> if EOF occurred before the next entry
-//            /// could be read. </returns>
-//            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-//            public virtual sbyte[] Read()
-//            {
-//                short length;
-//                try
-//                {
-//                    length = inputStream.ReadShort();
-//                }
-//                catch (EOFException e)
-//                {
-//                    return null;
-//                }
-//
-//                Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
-//                sbyte[] result = new sbyte[length];
-//                inputStream.ReadFully(result);
-//                return result;
-//            }
-//
-//            /// <summary>
-//            /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
-//            /// </summary>
-//            public void Dispose()
-//            {
-//                var @is = inputStream as IDisposable;
-//                if (@is != null)
-//                {
-//                    @is.Dispose();
-//                }
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Returns the comparator in use to sort entries </summary>
-//        public IComparer<BytesRef> Comparator
-//        {
-//            get
-//            {
-//                return comparator;
-//            }
-//        }
+
+        /// <summary>
+        /// Utility class to read length-prefixed byte[] entries from an input.
+        /// Complementary to <seealso cref="ByteSequencesWriter"/>.
+        /// </summary>
+        public class ByteSequencesReader : IDisposable
+        {
+            internal readonly DataInput inputStream;
+
+            /// <summary>
+            /// Constructs a ByteSequencesReader from the provided File </summary>
+            public ByteSequencesReader(FileInfo file)
+                : this(new BinaryReaderDataInput(new BinaryReader(new FileStream(file.FullName, FileMode.Open))))
+            {
+            }
+
+            /// <summary>
+            /// Constructs a ByteSequencesReader from the provided DataInput </summary>
+            public ByteSequencesReader(DataInput inputStream)
+            {
+                this.inputStream = inputStream;
+            }
+
+            /// <summary>
+            /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
+            /// storage is resized if needed.
+            /// </summary>
+            /// <returns> Returns <code>false</code> if EOF occurred when trying to read
+            /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
+            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+            public virtual bool Read(BytesRef @ref)
+            {
+                short length;
+                try
+                {
+                    length = inputStream.ReadShort();
+                }
+                catch (Exception)
+                {
+                    return false;
+                }
+
+                @ref.Grow(length);
+                @ref.Offset = 0;
+                @ref.Length = length;
+                inputStream.ReadBytes(@ref.Bytes, 0, length);
+                return true;
+            }
+
+            /// <summary>
+            /// Reads the next entry and returns it if successful.
+            /// </summary>
+            /// <seealso cref= #read(BytesRef)
+            /// </seealso>
+            /// <returns> Returns <code>null</code> if EOF occurred before the next entry
+            /// could be read. </returns>
+            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+            public virtual byte[] Read()
+            {
+                short length;
+                try
+                {
+                    length = inputStream.ReadShort();
+                }
+                catch (Exception e)
+                {
+                    return null;
+                }
+
+                Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
+                byte[] result = new byte[length];
+                inputStream.ReadBytes(result, 0, length);
+                return result;
+            }
+
+            /// <summary>
+            /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
+            /// </summary>
+            public void Dispose()
+            {
+                var @is = inputStream as IDisposable;
+                if (@is != null)
+                {
+                    @is.Dispose();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Returns the comparator in use to sort entries </summary>
+        public IComparer<BytesRef> Comparator
+        {
+            get
+            {
+                return comparator;
+            }
+        }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/572ad694/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj b/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
index de4bb24..5c148a2 100644
--- a/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
+++ b/src/Lucene.Net.Tests/Lucene.Net.Tests.csproj
@@ -543,6 +543,7 @@
     <Compile Include="core\Util\TestNumericUtils.cs">
       <SubType>Code</SubType>
     </Compile>
+    <Compile Include="core\Util\TestOfflineSorter.cs" />
     <Compile Include="core\Util\TestOpenBitSet.cs">
       <SubType>Code</SubType>
     </Compile>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/572ad694/src/Lucene.Net.Tests/core/Util/TestOfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Util/TestOfflineSorter.cs b/src/Lucene.Net.Tests/core/Util/TestOfflineSorter.cs
index 3f5530c..ffd1fc6 100644
--- a/src/Lucene.Net.Tests/core/Util/TestOfflineSorter.cs
+++ b/src/Lucene.Net.Tests/core/Util/TestOfflineSorter.cs
@@ -3,33 +3,33 @@ using System.Collections.Generic;
 using System.IO;
 using Lucene.Net.Support;
 using NUnit.Framework;
+using Lucene.Net.Attributes;
 
 namespace Lucene.Net.Util
 {
-    using NUnit.Framework;
     /*
-         * Licensed to the Apache Software Foundation (ASF) under one or more
-         * contributor license agreements.  See the NOTICE file distributed with
-         * this work for additional information regarding copyright ownership.
-         * The ASF licenses this file to You under the Apache License, Version 2.0
-         * (the "License"); you may not use this file except in compliance with
-         * the License.  You may obtain a copy of the License at
-         *
-         *     http://www.apache.org/licenses/LICENSE-2.0
-         *
-         * Unless required by applicable law or agreed to in writing, software
-         * distributed under the License is distributed on an "AS IS" BASIS,
-         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-         * See the License for the specific language governing permissions and
-         * limitations under the License.
-         */
+    * Licensed to the Apache Software Foundation (ASF) under one or more
+    * contributor license agreements.  See the NOTICE file distributed with
+    * this work for additional information regarding copyright ownership.
+    * The ASF licenses this file to You under the Apache License, Version 2.0
+    * (the "License"); you may not use this file except in compliance with
+    * the License.  You may obtain a copy of the License at
+    *
+    *     http://www.apache.org/licenses/LICENSE-2.0
+    *
+    * Unless required by applicable law or agreed to in writing, software
+    * distributed under the License is distributed on an "AS IS" BASIS,
+    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    * See the License for the specific language governing permissions and
+    * limitations under the License.
+    */
 
 
     //LUCENE PORT NOTE: The corresponding file was left out of the port due to being experimental on not porting properly
-    /*
-    using BufferSize = Lucene.Net.Util.OfflineSorter.BufferSize;
-    using ByteSequencesWriter = Lucene.Net.Util.OfflineSorter.ByteSequencesWriter;
-    using SortInfo = Lucene.Net.Util.OfflineSorter.SortInfo;
+    
+    //using BufferSize = Lucene.Net.Util.OfflineSorter.BufferSize;
+    //using ByteSequencesWriter = Lucene.Net.Util.OfflineSorter.ByteSequencesWriter;
+    //using SortInfo = Lucene.Net.Util.OfflineSorter.SortInfo;
 
     /// <summary>
     /// Tests for on-disk merge sorting.
@@ -44,66 +44,79 @@ namespace Lucene.Net.Util
         {
             base.SetUp();
             TempDir = CreateTempDir("mergesort");
-            TestUtil.Rm(TempDir);
+            DeleteTestFiles();
             TempDir.Create();
         }
 
         [TearDown]
         public override void TearDown()
         {
+            DeleteTestFiles();
+            base.TearDown();
+        }
+
+        private void DeleteTestFiles()
+        {
             if (TempDir != null)
             {
-                TestUtil.Rm(TempDir);
+                if (Directory.Exists(TempDir.FullName))
+                {
+                    foreach (var file in TempDir.GetFiles())
+                    {
+                        file.Delete();
+                    }
+                    TempDir.Delete();
+                }
             }
-            base.TearDown();
         }
 
         [Test]
         public virtual void TestEmpty()
         {
-            CheckSort(new OfflineSorter(), new sbyte[][] { });
+            CheckSort(new OfflineSorter(), new byte[][] { });
         }
 
         [Test]
         public virtual void TestSingleLine()
         {
-            CheckSort(new OfflineSorter(), new sbyte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+            CheckSort(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
         }
 
         [Test]
         public virtual void TestIntermediateMerges()
         {
             // Sort 20 mb worth of data with 1mb buffer, binary merging.
-            OfflineSorter.SortInfo info = CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), 2), GenerateRandom((int)OfflineSorter.MB * 20));
-            Assert.IsTrue(info.mergeRounds > 10);
+            OfflineSorter.SortInfo info = CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir(), 2), GenerateRandom((int)OfflineSorter.MB * 20));
+            Assert.IsTrue(info.MergeRounds > 10);
         }
 
         [Test]
         public virtual void TestSmallRandom()
         {
             // Sort 20 mb worth of data with 1mb buffer.
-            OfflineSorter.SortInfo sortInfo = CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 20));
-            Assert.AreEqual(1, sortInfo.mergeRounds);
+            OfflineSorter.SortInfo sortInfo = CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir(), OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 20));
+            Assert.AreEqual(1, sortInfo.MergeRounds);
         }
 
         [Test]
+        [Timeout(120000), LongRunningTest]
         public virtual void TestLargerRandom()
         {
             // Sort 100MB worth of data with 15mb buffer.
-            CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.megabytes(16), OfflineSorter.defaultTempDir(), OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
+            CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, OfflineSorter.BufferSize.Megabytes(16), OfflineSorter.DefaultTempDir(), OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
         }
 
-        private sbyte[][] GenerateRandom(int howMuchData)
+        private byte[][] GenerateRandom(int howMuchData)
         {
-            List<sbyte[]> data = new List<sbyte[]>();
+            List<byte[]> data = new List<byte[]>();
             while (howMuchData > 0)
             {
-                sbyte[] current = new sbyte[Random().Next(256)];
+                byte[] current = new byte[Random().Next(256)];
                 Random().NextBytes((byte[])(Array)current);
                 data.Add(current);
                 howMuchData -= current.Length;
             }
-            sbyte[][] bytes = data.ToArray();
+            byte[][] bytes = data.ToArray();
             return bytes;
         }
 
@@ -132,15 +145,15 @@ namespace Lucene.Net.Util
         /// <summary>
         /// Check sorting data on an instance of <seealso cref="OfflineSorter"/>.
         /// </summary>
-        private OfflineSorter.SortInfo CheckSort(OfflineSorter sort, sbyte[][] data)
+        private OfflineSorter.SortInfo CheckSort(OfflineSorter sort, byte[][] data)
         {
-            File unsorted = WriteAll("unsorted", data);
+            FileInfo unsorted = WriteAll("unsorted", data);
 
             Array.Sort(data, unsignedByteOrderComparator);
-            File golden = WriteAll("golden", data);
+            FileInfo golden = WriteAll("golden", data);
 
-            File sorted = new File(TempDir, "sorted");
-            OfflineSorter.SortInfo sortInfo = sort.sort(unsorted, sorted);
+            FileInfo sorted = new FileInfo(Path.Combine(TempDir.FullName, "sorted"));
+            OfflineSorter.SortInfo sortInfo = sort.Sort(unsorted, sorted);
             //System.out.println("Input size [MB]: " + unsorted.Length() / (1024 * 1024));
             //System.out.println(sortInfo);
 
@@ -151,33 +164,40 @@ namespace Lucene.Net.Util
         /// <summary>
         /// Make sure two files are byte-byte identical.
         /// </summary>
-        private void AssertFilesIdentical(File golden, File sorted)
+        private void AssertFilesIdentical(FileInfo golden, FileInfo sorted)
         {
-            Assert.AreEqual(golden.Length(), sorted.Length());
+            Assert.AreEqual(golden.Length, sorted.Length);
 
-            sbyte[] buf1 = new sbyte[64 * 1024];
-            sbyte[] buf2 = new sbyte[64 * 1024];
+            byte[] buf1 = new byte[64 * 1024];
+            byte[] buf2 = new byte[64 * 1024];
             int len;
-            DataInputStream is1 = new DataInputStream(new FileInputStream(golden));
-            DataInputStream is2 = new DataInputStream(new FileInputStream(sorted));
-            while ((len = is1.read(buf1)) > 0)
+            //DataInputStream is1 = new DataInputStream(new FileInputStream(golden));
+            //DataInputStream is2 = new DataInputStream(new FileInputStream(sorted));
+            using (Stream is1 = golden.Open(FileMode.Open, FileAccess.Read, FileShare.Delete))
             {
-                is2.readFully(buf2, 0, len);
-                for (int i = 0; i < len; i++)
+                using (Stream is2 = sorted.Open(FileMode.Open, FileAccess.Read, FileShare.Delete))
                 {
-                    Assert.AreEqual(buf1[i], buf2[i]);
+                    while ((len = is1.Read(buf1, 0, buf1.Length)) > 0)
+                    {
+                        is2.Read(buf2, 0, len);
+                        for (int i = 0; i < len; i++)
+                        {
+                            Assert.AreEqual(buf1[i], buf2[i]);
+                        }
+                    }
+                    //IOUtils.Close(is1, is2);
                 }
             }
-            IOUtils.Close(is1, is2);
         }
 
-        private File WriteAll(string name, sbyte[][] data)
+        private FileInfo WriteAll(string name, byte[][] data)
         {
-            File file = new File(TempDir, name);
+            FileInfo file = new FileInfo(Path.Combine(TempDir.FullName, name));
+            using (file.Create()) { }
             OfflineSorter.ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
-            foreach (sbyte[] datum in data)
+            foreach (byte[] datum in data)
             {
-                w.write(datum);
+                w.Write(datum);
             }
             w.Dispose();
             return file;
@@ -189,14 +209,14 @@ namespace Lucene.Net.Util
             int numIters = AtLeast(10000);
             for (int i = 0; i < numIters; i++)
             {
-                OfflineSorter.BufferSize.megabytes(1 + Random().Next(2047));
+                OfflineSorter.BufferSize.Megabytes(1 + Random().Next(2047));
             }
-            OfflineSorter.BufferSize.megabytes(2047);
-            OfflineSorter.BufferSize.megabytes(1);
+            OfflineSorter.BufferSize.Megabytes(2047);
+            OfflineSorter.BufferSize.Megabytes(1);
 
             try
             {
-                OfflineSorter.BufferSize.megabytes(2048);
+                OfflineSorter.BufferSize.Megabytes(2048);
                 Assert.Fail("max mb is 2047");
             }
             catch (System.ArgumentException e)
@@ -205,7 +225,7 @@ namespace Lucene.Net.Util
 
             try
             {
-                OfflineSorter.BufferSize.megabytes(0);
+                OfflineSorter.BufferSize.Megabytes(0);
                 Assert.Fail("min mb is 0.5");
             }
             catch (System.ArgumentException e)
@@ -214,13 +234,12 @@ namespace Lucene.Net.Util
 
             try
             {
-                OfflineSorter.BufferSize.megabytes(-1);
+                OfflineSorter.BufferSize.Megabytes(-1);
                 Assert.Fail("min mb is 0.5");
             }
             catch (System.ArgumentException e)
             {
             }
         }
-    }*/
-
+    }
 }
\ No newline at end of file


[46/50] [abbrv] lucenenet git commit: Fixed math bugs in the Analysis.De.GermanStemmer class that were causing Analysis.De.TestGermanAnalyzer.TestGermanSpecials() and Analysis.De.TestGermanStemFilter.TestStemming() tests to fail

Posted by sy...@apache.org.
Fixed math bugs in the Analysis.De.GermanStemmer class that were causing Analysis.De.TestGermanAnalyzer.TestGermanSpecials() and Analysis.De.TestGermanStemFilter.TestStemming() tests to fail


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/56cdc043
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/56cdc043
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/56cdc043

Branch: refs/heads/analysis-work
Commit: 56cdc043a040d556afccea931fa2e0d24ca24d94
Parents: b04aca6
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 11:57:34 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 11:57:34 2016 +0700

----------------------------------------------------------------------
 .../Analysis/De/GermanStemmer.cs                          | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56cdc043/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
index 8b260be..2338085 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemmer.cs
@@ -100,15 +100,15 @@ namespace Lucene.Net.Analysis.De
             {
                 if ((buffer.Length + substCount > 5) && buffer.ToString(buffer.Length - 2, buffer.Length - (buffer.Length - 2)).Equals("nd"))
                 {
-                    buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+                    buffer.Remove(buffer.Length - 2, buffer.Length - (buffer.Length - 2));
                 }
                 else if ((buffer.Length + substCount > 4) && buffer.ToString(buffer.Length - 2, buffer.Length - (buffer.Length - 2)).Equals("em"))
                 {
-                    buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+                    buffer.Remove(buffer.Length - 2, buffer.Length - (buffer.Length - 2));
                 }
                 else if ((buffer.Length + substCount > 4) && buffer.ToString(buffer.Length - 2, buffer.Length - (buffer.Length - 2)).Equals("er"))
                 {
-                    buffer.Remove(buffer.Length - 2, buffer.Length - buffer.Length - 2);
+                    buffer.Remove(buffer.Length - 2, buffer.Length - (buffer.Length - 2));
                 }
                 else if (buffer[buffer.Length - 1] == 'e')
                 {
@@ -165,7 +165,7 @@ namespace Lucene.Net.Analysis.De
                 {
                     if (buffer.ToString(c, 4).Equals("gege"))
                     {
-                        buffer.Remove(c, c + 2 - c);
+                        buffer.Remove(c, (c + 2) - c);
                         return;
                     }
                 }
@@ -219,7 +219,7 @@ namespace Lucene.Net.Analysis.De
                     if ((c < buffer.Length - 2) && buffer[c] == 's' && buffer[c + 1] == 'c' && buffer[c + 2] == 'h')
                     {
                         buffer[c] = '$';
-                        buffer.Remove(c + 1, c + 3 - c + 1);
+                        buffer.Remove(c + 1, (c + 3) - (c + 1));
                         substCount = +2;
                     }
                     else if (buffer[c] == 'c' && buffer[c + 1] == 'h')


[17/50] [abbrv] lucenenet git commit: Bug (memory leak): Dispose() should cascade to Dispose() to other objects and release its own resources.

Posted by sy...@apache.org.
Bug (memory leak): Dispose() should cascade to Dispose() to other objects and release its own resources.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/dae551ff
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/dae551ff
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/dae551ff

Branch: refs/heads/analysis-work
Commit: dae551ff7eefdcfbec3ae5db2a35940daa5aa049
Parents: 87c185d
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 19:15:42 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 19:15:42 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Analysis/Tokenizer.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/dae551ff/src/Lucene.Net.Core/Analysis/Tokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenizer.cs b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
index b0215be..a354310 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenizer.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
@@ -73,7 +73,7 @@ namespace Lucene.Net.Analysis
         /// </summary>
         public override void Dispose()
         {
-            input.Close();
+            input.Dispose();
             // LUCENE-2387: don't hold onto Reader after close, so
             // GC can reclaim
             InputPending = ILLEGAL_STATE_READER;


[32/50] [abbrv] lucenenet git commit: Fixed "out of range" and "key not found" bugs in Facet.Taxonomy.TestTaxonomyFacetSumValueSource.TestRandom()

Posted by sy...@apache.org.
Fixed "out of range" and "key not found" bugs in Facet.Taxonomy.TestTaxonomyFacetSumValueSource.TestRandom()


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0fd8d346
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0fd8d346
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0fd8d346

Branch: refs/heads/analysis-work
Commit: 0fd8d3465d54362285f83635871528a5ef888ae2
Parents: 5b9b2fe
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 10:36:54 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 10:36:54 2016 +0700

----------------------------------------------------------------------
 .../Taxonomy/TestTaxonomyFacetSumValueSource.cs                | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0fd8d346/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetSumValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetSumValueSource.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetSumValueSource.cs
index 21ede3c..e05daa9 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetSumValueSource.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetSumValueSource.cs
@@ -541,10 +541,10 @@ namespace Lucene.Net.Facet.Taxonomy
                 Facets facets = new TaxonomyFacetSumValueSource(tr, config, fc, values);
 
                 // Slow, yet hopefully bug-free, faceting:
-                var expectedValues = new List<Dictionary<string, float?>>();
+                var expectedValues = new List<Dictionary<string, float?>>(numDims);
                 for (int i = 0; i < numDims; i++)
                 {
-                    expectedValues[i] = new Dictionary<string, float?>();
+                    expectedValues.Add(new Dictionary<string, float?>());
                 }
 
                 foreach (TestDoc doc in testDocs)
@@ -555,7 +555,7 @@ namespace Lucene.Net.Facet.Taxonomy
                         {
                             if (doc.dims[j] != null)
                             {
-                                float? v = expectedValues[j][doc.dims[j]];
+                                float? v = expectedValues[j].ContainsKey(doc.dims[j]) ? expectedValues[j][doc.dims[j]] : null;
                                 if (v == null)
                                 {
                                     expectedValues[j][doc.dims[j]] = doc.value;


[16/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/87c185dc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/87c185dc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/87c185dc

Branch: refs/heads/analysis-work
Commit: 87c185dc1e567d5d30182e58617b17331ae0dec8
Parents: e4d9f44
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 16:47:17 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 16:47:17 2016 +0700

----------------------------------------------------------------------
 .../Analysis/En/KStemData1.cs                   | 703 +++++++++++++++++-
 .../Analysis/En/KStemData2.cs                   | 704 ++++++++++++++++++-
 .../Analysis/En/KStemData3.cs                   | 703 +++++++++++++++++-
 .../Analysis/En/KStemData4.cs                   | 703 +++++++++++++++++-
 .../Analysis/En/KStemData5.cs                   | 704 ++++++++++++++++++-
 .../Analysis/En/KStemData6.cs                   | 703 +++++++++++++++++-
 .../Analysis/En/KStemData7.cs                   | 703 +++++++++++++++++-
 .../Analysis/En/KStemData8.cs                   | 602 +++++++++++++++-
 .../Analysis/En/KStemFilter.cs                  |   2 +-
 .../Analysis/En/KStemmer.cs                     | 544 +++++++-------
 .../Lucene.Net.TestFramework.csproj             |   2 +-
 11 files changed, 5815 insertions(+), 258 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
index 9556024..8995d7a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData1.cs
@@ -49,6 +49,707 @@ namespace Lucene.Net.Analysis.En
         {
         }
         // KStemData1 ... KStemData8 are created from "head_word_list.txt"
-        internal static string[] data = new string[] { "aback", "abacus", "abandon", "abandoned", "abase", "abash", "abate", "abattoir", "abbess", "abbey", "abbot", "abbreviate", "abbreviation", "abc", "abdicate", "abdomen", "abduct", "abed", "aberrant", "aberration", "abet", "abeyance", "abhor", "abhorrent", "abide", "abiding", "abilities", "ability", "abject", "abjure", "ablative", "ablaut", "ablaze", "able", "ablution", "ablutions", "ably", "abnegation", "abnormal", "abo", "aboard", "abode", "abolish", "abolition", "abominable", "abominate", "abomination", "aboriginal", "aborigine", "abort", "abortion", "abortionist", "abortive", "abound", "about", "above", "aboveboard", "abracadabra", "abrade", "abrasion", "abrasive", "abreast", "abridge", "abridgement", "abridgment", "abroad", "abrogate", "abrupt", "abscess", "abscond", "absence", "absent", "absentee", "absenteeism", "absently", "absinth", "absinthe", "absolute", "absolutely", "absolution", "absolutism", "absolve", "absorb", "a
 bsorbent", "absorbing", "absorption", "abstain", "abstemious", "abstention", "abstinence", "abstract", "abstracted", "abstraction", "abstruse", "absurd", "abundance", "abundant", "abuse", "abusive", "abut", "abutment", "abysmal", "abyss", "acacia", "academic", "academician", "academy", "accede", "accelerate", "acceleration", "accelerator", "accent", "accentuate", "accept", "acceptable", "acceptance", "access", "accessible", "accession", "accessory", "accidence", "accident", "accidental", "acclaim", "acclamation", "acclimatize", "acclivity", "accolade", "accommodate", "accommodating", "accommodation", "accommodations", "accompaniment", "accompanist", "accompany", "accomplice", "accomplish", "accomplished", "accomplishment", "accord", "accordance", "according", "accordingly", "accordion", "accost", "account", "accountable", "accountancy", "accountant", "accoutrements", "accredit", "accretion", "accrue", "accumulate", "accumulation", "accumulative", "accumulator", "accuracy", "accurate
 ", "accursed", "accusation", "accusative", "accuse", "accused", "accustom", "accustomed", "ace", "acerbity", "acetate", "acetic", "acetylene", "ache", "achieve", "achievement", "achoo", "acid", "acidify", "acidity", "acidulated", "acidulous", "acknowledge", "acknowledgement", "acknowledgment", "acme", "acne", "acolyte", "aconite", "acorn", "acoustic", "acoustics", "acquaint", "acquaintance", "acquaintanceship", "acquiesce", "acquiescent", "acquire", "acquisition", "acquisitive", "acquit", "acquittal", "acre", "acreage", "acrid", "acrimony", "acrobat", "acrobatic", "acrobatics", "acronym", "across", "acrostic", "act", "acting", "actinism", "action", "actionable", "activate", "active", "activist", "activity", "actor", "actress", "acts", "actual", "actuality", "actually", "actuary", "actuate", "acuity", "acumen", "acupuncture", "acute", "adage", "adagio", "adam", "adamant", "adapt", "adaptable", "adaptation", "adapter", "adaptor", "adc", "add", "addendum", "adder", "addict", "addiction
 ", "addictive", "addition", "additional", "additive", "addle", "address", "addressee", "adduce", "adenoidal", "adenoids", "adept", "adequate", "adhere", "adherence", "adherent", "adhesion", "adhesive", "adieu", "adipose", "adj", "adjacent", "adjective", "adjoin", "adjourn", "adjudge", "adjudicate", "adjunct", "adjure", "adjust", "adjutant", "adman", "admass", "administer", "administration", "administrative", "administrator", "admirable", "admiral", "admiralty", "admiration", "admire", "admirer", "admissible", "admission", "admit", "admittance", "admitted", "admittedly", "admixture", "admonish", "admonition", "admonitory", "ado", "adobe", "adolescent", "adopt", "adoption", "adoptive", "adorable", "adoration", "adore", "adorn", "adornment", "adrenalin", "adrift", "adroit", "adulate", "adulation", "adult", "adulterate", "adulterer", "adultery", "adumbrate", "adv", "advance", "advanced", "advancement", "advances", "advantage", "advantageous", "advent", "adventist", "adventitious", "adve
 nture", "adventurer", "adventuress", "adventurous", "adverb", "adverbial", "adversary", "adverse", "adversity", "advert", "advertise", "advertisement", "advertising", "advice", "advisable", "advise", "advisedly", "adviser", "advisor", "advisory", "advocacy", "advocate", "adz", "adze", "aegis", "aeon", "aerate", "aerial", "aerie", "aerobatic", "aerobatics", "aerodrome", "aerodynamic", "aerodynamics", "aeronautics", "aeroplane", "aerosol", "aerospace", "aertex", "aery", "aesthete", "aesthetic", "aesthetics", "aether", "aethereal", "aetiology", "afar", "affable", "affair", "affect", "affectation", "affected", "affecting", "affection", "affectionate", "affiance", "affidavit", "affiliate", "affiliation", "affinity", "affirm", "affirmative", "affix", "afflict", "affliction", "affluent", "afford", "afforest", "affray", "affricate", "affront", "aficionado", "afield", "afire", "aflame", "afloat", "afoot", "aforesaid", "aforethought", "afraid", "afresh", "afrikaans", "afrikaner", "afro", "aft
 ", "after", "afterbirth", "aftercare", "aftereffect", "afterglow", "afterlife", "aftermath", "afternoon", "afternoons", "afters", "aftershave", "aftertaste", "afterthought", "afterwards", "again", "against", "agape", "agate", "age", "ageing", "ageless", "agency", "agenda", "agent", "agglomerate", "agglutination", "agglutinative", "aggrandisement", "aggrandizement", "aggravate", "aggravation", "aggregate", "aggregation", "aggression", "aggressive", "aggressor", "aggrieved", "aggro", "aghast", "agile", "agitate", "agitation", "agitator", "aglow", "agnostic", "ago", "agog", "agonise", "agonised", "agonising", "agonize", "agonized", "agonizing", "agony", "agoraphobia", "agoraphobic", "agrarian", "agree", "agreeable", "agreeably", "agreement", "agriculture", "agronomy", "aground", "ague", "aha", "ahead", "ahem", "ahoy", "aid", "ail", "aileron", "ailment", "aim", "aimless", "air", "airbase", "airbed", "airbladder", "airborne", "airbrake", "airbrick", "airbus", "aircraft", "aircraftman", "
 aircrew", "aircushion", "airdrop", "airedale", "airfield", "airflow", "airforce", "airgun", "airhole", "airhostess", "airily", "airing", "airlane", "airless", "airletter", "airlift", "airline", "airliner", "airlock", "airmail", "airman", "airplane", "airpocket", "airport", "airs", "airshaft", "airship", "airsick", "airspace", "airspeed", "airstrip", "airtight", "airway", "airwoman", "airworthy", "airy", "aisle", "aitch", "ajar", "akimbo", "akin", "alabaster", "alack", "alacrity", "alarm", "alarmist", "alas", "albatross", "albeit", "albino", "album", "albumen", "alchemist", "alchemy", "alcohol", "alcoholic", "alcoholism", "alcove", "alder", "alderman", "ale", "alehouse", "alert", "alfalfa", "alfresco", "algae", "algebra", "algorithm", "alias", "alibi", "alien", "alienate", "alienation", "alienist", "alight", "align", "alignment", "alike", "alimentary", "alimony", "aline", "alinement", "alive", "alkali", "alkaline", "all", "allah", "allay", "allegation", "allege", "allegedly", "allegi
 ance", "allegorical", "allegory", "allegretto", "allegro", "alleluia", "allergic", "allergy", "alleviate", "alley", "alleyway", "alliance", "allied", "alligator", "alliteration", "alliterative", "allocate", "allocation", "allopathy", "allot", "allotment", "allow", "allowable", "allowance", "alloy", "allspice", "allude", "allure", "allurement", "allusion", "alluvial", "alluvium", "ally", "almanac", "almanack", "almighty", "almond", "almoner", "almost", "alms", "aloe", "aloft", "alone", "along", "alongside", "aloof", "alopecia", "aloud", "alpaca", "alpenhorn", "alpenstock", "alpha", "alphabet", "alphabetical", "alpine", "already", "alright", "alsatian", "also", "altar", "altarpiece", "alter", "alteration", "altercation", "alternate", "alternative", "alternator", "although", "altimeter", "altitude", "alto", "altogether", "altruism", "altruist", "alum", "aluminium", "alumna", "alumnus", "alveolar", "always", "alyssum", "amalgam", "amalgamate", "amanuensis", "amass", "amateur", "amateuri
 sh", "amatory", "amaze", "amazing", "amazon", "ambassador", "ambassadorial", "amber", "ambergris", "ambidextrous", "ambience", "ambient", "ambiguous", "ambit", "ambition", "ambitious", "ambivalent", "amble", "ambrosia", "ambulance", "ambush", "ame", "ameba", "ameliorate", "amen", "amenable", "amend", "amendment", "amends", "amenity", "americanise", "americanism", "americanize", "amethyst", "amiable", "amicable", "amid", "amidships", "amir", "amiss", "amity", "ammeter", "ammo", "ammonia", "ammonite", "ammunition", "amnesia", "amnesty", "amoeba", "amoebic", "amok", "among", "amoral", "amorous", "amorphous", "amortise", "amortize", "amount", "amour", "amp", "amperage", "ampersand", "amphetamine", "amphibian", "amphibious", "amphitheater", "amphitheatre", "amphora", "ample", "amplifier", "amplify", "amplitude", "ampoule", "amputate", "amputee", "amuck", "amulet", "amuse", "amusement", "anachronism", "anaconda", "anaemia", "anaemic", "anaesthesia", "anaesthetic", "anaesthetist", "anagram
 ", "anal", "analgesia", "analgesic", "analog", "analogize", "analogous", "analogue", "analogy", "analyse", "analysis", "analyst", "analytic", "anapaest", "anarchic", "anarchism", "anarchist", "anarchy", "anathema", "anathematize", "anatomical", "anatomist", "anatomy", "ancestor", "ancestral", "ancestry", "anchor", "anchorage", "anchorite", "anchovy", "ancient", "ancients", "ancillary", "and", "andante", "andiron", "androgynous", "anecdotal", "anecdote", "anemia", "anemometer", "anemone", "anesthesia", "anesthetise", "anesthetize", "anew", "angel", "angelica", "angelus", "anger", "angle", "anglican", "anglicise", "anglicism", "anglicize", "angling", "anglophile", "anglophilia", "anglophobe", "anglophobia", "angora", "angostura", "angry", "angst", "anguish", "anguished", "angular", "aniline", "animadversion", "animadvert", "animal", "animalcule", "animalism", "animate", "animation", "animism", "animosity", "animus", "anis", "anise", "aniseed", "ankle", "anklet", "annals", "anneal", "a
 nnex", "annexation", "annexe", "annihilate", "anniversary", "annotate", "annotation", "announce", "announcement", "announcer", "annoy", "annoyance", "annual", "annuity", "annul", "annular", "annunciation", "anode", "anodyne", "anoint", "anomalous", "anomaly", "anon", "anonymity", "anonymous", "anopheles", "anorak", "anorexia", "another", "answer", "answerable", "ant", "antacid", "antagonism", "antagonist", "antagonize", "antarctic", "ante", "anteater", "antecedence", "antecedent", "antecedents", "antechamber", "antedate", "antediluvian", "antelope", "antenatal", "antenna", "antepenultimate", "anterior", "anteroom", "anthem", "anther", "anthill", "anthology", "anthracite", "anthrax", "anthropocentric", "anthropoid", "anthropologist", "anthropology", "anthropomorphic", "anthropomorphism", "anthropophagous", "anthropophagy", "antiaircraft", "antibiotic", "antibody", "antic", "anticipate", "anticipation", "anticipatory", "anticlerical", "anticlimax", "anticlockwise", "antics", "anticycl
 one", "antidote", "antifreeze", "antigen", "antihero", "antihistamine", "antiknock", "antilogarithm", "antimacassar", "antimatter", "antimony", "antipathetic", "antipathy", "antipersonnel", "antipodal", "antipodes", "antiquarian", "antiquary", "antiquated", "antique", "antiquity", "antirrhinum", "antiseptic", "antisocial", "antithesis", "antithetic", "antitoxin", "antler", "antonym", "anus", "anvil", "anxiety", "anxious", "any", "anybody", "anyhow", "anyplace", "anyroad", "anything", "anyway", "anywhere", "aorta", "apace", "apanage", "apart", "apartheid", "apartment", "apartments", "apathetic", "apathy", "ape", "aperient", "aperitif", "aperture", "apex", "aphasia", "aphasic", "aphid", "aphorism", "aphoristic", "aphrodisiac", "apiarist", "apiary", "apices", "apiculture", "apiece", "apish", "aplomb", "apocalypse", "apocalyptic", "apocrypha", "apocryphal", "apogee", "apologetic", "apologetics", "apologia", "apologise", "apologist", "apologize", "apology", "apophthegm", "apoplectic", "a
 poplexy", "apostasy", "apostate", "apostatise", "apostatize", "apostle", "apostolic", "apostrophe", "apostrophize", "apothecary", "apothegm", "apotheosis", "appal", "appall", "appalling", "appanage", "apparatus", "apparel", "apparent", "apparently", "apparition", "appeal", "appealing", "appear", "appearance", "appearances", "appease", "appeasement", "appellant", "appellate", "appellation", "append", "appendage", "appendectomy", "appendicitis", "appendix", "appertain", "appetite", "appetizer", "appetizing", "applaud", "applause", "apple", "applejack", "appliance", "applicable", "applicant", "application", "applied", "apply", "appoint", "appointment", "appointments", "apportion", "apposite", "apposition", "appraisal", "appraise", "appreciable", "appreciate", "appreciation", "appreciative", "apprehend", "apprehension", "apprehensive", "apprentice", "apprenticeship", "apprise", "appro", "approach", "approachable", "approbation", "approbatory", "appropriate", "appropriation", "approval",
  "approve", "approx", "approximate", "approximation", "appurtenance", "apricot", "april", "apron", "apropos", "apse", "apt", "aptitude", "aqualung", "aquamarine", "aquaplane", "aquarium", "aquatic", "aquatint", "aqueduct", "aqueous", "aquiline", "arab", "arabesque", "arabic", "arable", "arachnid", "arak", "arbiter", "arbitrary", "arbitrate", "arbitration", "arbitrator", "arbor", "arboreal", "arboretum", "arbour", "arc", "arcade", "arcadia", "arcane", "arch", "archaeology", "archaic", "archaism", "archangel", "archbishop", "archbishopric", "archdeacon", "archdeaconry", "archdiocese", "archduke", "archeology", "archer", "archery", "archetype", "archimandrite", "archipelago", "architect", "architecture", "archive", "archway", "arctic", "ardent", "ardor", "ardour", "arduous", "are", "area", "areca", "arena", "argent", "argon", "argot", "arguable", "argue", "argument", "argumentative", "aria", "arid", "aries", "aright", "arise", "aristocracy", "aristocrat", "aristocratic", "arithmetic", 
 "arithmetician", "ark", "arm", "armada", "armadillo", "armament", "armature", "armband", "armchair", "armed", "armful", "armhole", "armistice", "armlet", "armor", "armorer", "armorial", "armory", "armour", "armoured", "armourer", "armoury", "armpit", "arms", "army", "aroma", "aromatic", "arose", "around", "arouse", "arpeggio", "arquebus", "arrack", "arraign", "arrange", "arrangement", "arrant", "arras", "array", "arrears", "arrest", "arrival", "arrive", "arrogance", "arrogant", "arrogate", "arrow", "arrowhead", "arrowroot", "arse", "arsenal", "arsenic", "arson", "art", "artefact", "arterial", "arteriosclerosis", "artery", "artful", "arthritis", "artichoke", "article", "articles", "articulate", "articulated", "articulateness", "articulation", "artifact", "artifice", "artificer", "artificial", "artillery", "artisan", "artist", "artiste", "artistic", "artistry", "artless", "arts", "arty", "arum", "asbestos", "ascend", "ascendancy", "ascendant", "ascendency", "ascendent", "ascension", "
 ascent", "ascertain", "ascetic", "ascribe", "ascription", "asepsis", "aseptic", "asexual", "ash", "ashamed", "ashbin", "ashcan", "ashen", "ashes", "ashore", "ashtray", "ashy", "aside", "asinine", "ask", "askance", "askew", "aslant", "asleep", "asp", "asparagus", "aspect", "aspectual", "aspen", "asperity", "aspersion", "asphalt", "asphodel", "asphyxia", "asphyxiate", "aspic", "aspidistra", "aspirant", "aspirate", "aspiration", "aspire", "aspirin", "ass", "assagai", "assail", "assailant", "assassin", "assassinate", "assault", "assay", "assegai", "assemblage", "assemble", "assembly", "assemblyman", "assent", "assert", "assertion", "assertive", "assess", "assessment", "assessor", "asset", "asseverate", "assiduity", "assiduous", "assign", "assignation", "assignment", "assimilate", "assimilation", "assist", "assistance", "assistant", "assize", "assizes", "associate", "association", "assonance", "assort", "assorted", "assortment", "asst", "assuage", "assume", "assumption", "assurance", "as
 sure", "assured", "aster", "asterisk", "astern", "asteroid", "asthma", "astigmatic", "astigmatism", "astir", "astonish", "astonishment", "astound", "astrakhan", "astral", "astray", "astride", "astringent", "astrolabe", "astrologer", "astrology", "astronaut", "astronautics", "astronomer", "astronomical", "astronomy", "astrophysics", "astute", "asunder", "asylum", "asymmetric", "atavism", "atchoo", "ate", "atelier", "atheism", "atheist", "athlete", "athletic", "athletics", "athwart", "atishoo", "atlas", "atmosphere", "atmospheric", "atmospherics", "atoll", "atom", "atomic", "atomise", "atomize", "atonal", "atonality", "atone", "atop", "atrocious", "atrocity", "atrophy", "attach", "attachment", "attack", "attain", "attainder", "attainment", "attar", "attempt", "attend", "attendance", "attendant", "attention", "attentive", "attenuate", "attest", "attestation", "attested", "attic", "attire", "attitude", "attitudinise", "attitudinize", "attorney", "attract", "attraction", "attractive", "a
 ttributable", "attribute", "attribution", "attributive", "attrition", "attune", "atypical", "aubergine", "aubrietia", "auburn", "auction", "auctioneer", "audacious", "audacity", "audible", "audience", "audio", "audiometer", "audit", "audition", "auditor", "auditorium", "auditory", "auger", "aught", "augment", "augmentation", "augur", "augury", "august", "auk", "aunt", "aura", "aural", "aureole", "auricle", "auricular", "auriferous", "aurora", "auscultation", "auspices", "auspicious", "aussie", "austere", "austerity", "australasian", "autarchy", "autarky", "authentic", "authenticate", "authenticity", "author", "authoress", "authorisation", "authorise", "authoritarian", "authoritative", "authority", "authorization", "authorize", "authorship", "autism", "autistic", "auto", "autobahn", "autobiographical", "autobiography", "autocracy", "autocrat", "autoeroticism", "autograph", "automat", "automate", "automatic", "automation", "automatism", "automaton", "automobile", "autonomous", "autono
 my", "autopsy", "autostrada", "autosuggestion", "autumn", "autumnal", "auxiliary", "avail", "available", "avalanche", "avarice", "avaricious", "avatar", "avaunt", "avenge", "avenue", "aver", "average", "averse", "aversion", "aversive", "avert", "aviary", "aviation", "aviator", "avid", "avocado", "avocation", "avocet", "avoid", "avoidance", "avoirdupois", "avow", "avowal", "avowed", "avuncular", "await", "awake", "awaken", "awakening", "award", "aware", "awash", "away", "awe", "awesome", "awestruck", "awful", "awfully", "awhile", "awkward", "awl", "awning", "awoke", "awoken", "awry", "axe", "axiom", "axiomatic", "axis", "axle", "axolotl", "ayah", "aye", "azalea", "azimuth", "azure", "baa", "babble", "babbler", "babe", "babel", "baboo", "baboon", "babu", "baby", "babyhood", "babyish", "baccalaureate", "baccara", "baccarat", "bacchanal", "baccy", "bachelor", "bacillus", "back", "backache", "backbench", "backbite", "backbone", "backbreaking", "backchat", "backcloth", "backcomb", "backda
 te", "backdrop", "backer", "backfire", "backgammon", "background", "backhand", "backhanded", "backhander", "backing", "backlash", "backlog", "backmost", "backpedal", "backside", "backslide", "backspace", "backstage", "backstairs", "backstay", "backstroke", "backtrack", "backup", "backward", "backwards", "backwash", "backwater", "backwoods", "backwoodsman", "backyard", "bacon", "bacteria", "bacteriology", "bactrian", "bad", "bade", "badge", "badger", "badinage", "badly", "badminton", "baffle", "baffling", "bag", "bagatelle", "bagful", "baggage", "baggy", "bagpipes", "bags", "bah", "bail", "bailey", "bailiff", "bairn", "bait", "baize", "bake", "bakelite", "baker", "bakery", "baksheesh", "balaclava", "balalaika", "balance", "balanced", "balcony", "bald", "balderdash", "balding", "baldly", "baldric", "bale", "baleful", "balk", "ball", "ballad", "ballade", "ballast", "ballcock", "ballerina", "ballet", "ballistic", "ballistics", "ballocks", "balloon", "ballooning", "balloonist", "ballot",
  "ballpoint", "ballroom", "balls", "bally", "ballyhoo", "balm", "balmy", "baloney", "balsa", "balsam", "balustrade", "bamboo", "bamboozle", "ban", "banal", "banana", "band", "bandage", "bandana", "bandanna", "bandbox", "bandeau", "bandit", "banditry", "bandmaster", "bandoleer", "bandolier", "bandsman", "bandstand", "bandwagon", "bandy", "bane", "baneful", "bang", "banger", "bangle", "banian", "banish", "banister", "banjo", "bank", "bankbook", "banker", "banking", "bankrupt", "bankruptcy", "banner", "bannock", "banns", "banquet", "banshee", "bantam", "bantamweight", "banter", "banyan", "baobab", "baptise", "baptism", "baptist", "baptize", "bar", "barb", "barbarian", "barbaric", "barbarise", "barbarism", "barbarize", "barbarous", "barbecue", "barbed", "barbel", "barber", "barbican", "barbiturate", "barcarole", "barcarolle", "bard", "bare", "bareback", "barebacked", "barefaced", "barefoot", "bareheaded", "barelegged", "barely", "bargain", "barge", "bargee", "baritone", "barium", "bark"
 , "barker", "barley", "barleycorn", "barmaid", "barman", "barmy", "barn", "barnacle", "barnstorm", "barnyard", "barograph", "barometer", "baron", "baroness", "baronet", "baronetcy", "baronial", "barony", "baroque", "barque", "barrack", "barracks", "barracuda", "barrage", "barred", "barrel", "barren", "barricade", "barricades", "barrier", "barring", "barrister", "barrow", "bartender", "barter", "basalt", "base", "baseball", "baseboard", "baseless", "baseline", "basement", "bases", "bash", "bashful", "basic", "basically", "basics", "basil", "basilica", "basilisk", "basin", "basis", "bask", "basket", "basketball", "basketful", "basketry", "basketwork", "bass", "basset", "bassinet", "bassoon", "bast", "bastard", "bastardise", "bastardize", "bastardy", "baste", "bastinado", "bastion", "bat", "batch", "bated", "bath", "bathing", "bathos", "bathrobe", "bathroom", "baths", "bathtub", "bathysphere", "batik", "batiste", "batman", "baton", "bats", "batsman", "battalion", "batten", "batter", "b
 attery", "battle", "battleax", "battleaxe", "battlefield", "battlements", "battleship", "batty", "bauble", "baulk", "bauxite", "bawd", "bawdy", "bawl", "bay", "bayonet", "bayou", "bazaar", "bazooka", "bbc", "beach", "beachcomber", "beachhead", "beachwear", "beacon", "bead", "beading", "beadle", "beady", "beagle", "beagling", "beak", "beaker", "beam", "bean", "beanpole", "beanstalk", "bear", "bearable", "beard", "bearded", "bearer", "bearing", "bearings", "bearish", "bearskin", "beast", "beastly", "beat", "beaten", "beater", "beatific", "beatification", "beatify", "beating", "beatitude", "beatitudes", "beatnik", "beau", "beaujolais", "beaut", "beauteous", "beautician", "beautiful", "beautify", "beauty", "beaver", "bebop", "becalmed", "because", "beck", "beckon", "become", "becoming", "bed", "bedaub", "bedbug", "bedclothes", "bedding", "bedeck", "bedevil", "bedewed", "bedfellow", "bedimmed", "bedlam", "bedouin", "bedpan", "bedpost", "bedraggled", "bedridden", "bedrock", "bedroom", "be
 dside", "bedsore", "bedspread", "bedstead", "bedtime", "bee", "beech", "beef", "beefcake", "beefeater", "beefsteak", "beefy", "beehive", "beeline", "been", "beer", "beery", "beeswax", "beet", "beetle", "beetling", "beetroot", "beeves", "befall", "befit", "befitting", "before", "beforehand", "befriend", "befuddle", "beg", "beget", "beggar", "beggarly", "beggary", "begin", "beginner", "beginning", "begone", "begonia", "begorra", "begot", "begotten", "begrudge", "beguile", "begum", "begun", "behalf", "behave", "behavior", "behaviorism", "behaviour", "behaviourism", "behead", "behemoth", "behest", "behind", "behindhand", "behold", "beholden", "behove", "beige", "being", "belabor", "belabour", "belated", "belay", "belch", "beleaguer", "belfry", "belie", "belief", "believable", "believe", "believer", "belittle", "bell", "belladonna", "bellboy", "belle", "bellflower", "bellicose", "belligerency", "belligerent", "bellow", "bellows", "belly", "bellyache", "bellyful", "belong", "belongings", 
 "beloved", "below", "belt", "belted", "belting", "beltway", "bemoan", "bemused", "ben", "bench", "bencher", "bend", "bended", "bends", "beneath", "benedictine", "benediction", "benedictus", "benefaction", "benefactor", "benefice", "beneficent", "beneficial", "beneficiary", "benefit", "benevolence", "benevolent", "benighted", "benign", "benignity", "bent", "benumbed", "benzedrine", "benzene", "benzine", "bequeath", "bequest", "berate", "bereave", "bereaved", "bereavement", "bereft", "beret", "beriberi", "berk", "berry", "berserk", "berth", "beryl", "beseech", "beseem", "beset", "besetting", "beside", "besides", "besiege", "besmear", "besmirch", "besom", "besotted", "besought", "bespattered", "bespeak", "bespoke", "best", "bestial", "bestiality", "bestiary", "bestir", "bestow", "bestrew", "bestride", "bet", "beta", "betake", "betel", "bethel", "bethink", "betide", "betimes", "betoken", "betray", "betrayal", "betroth", "betrothal", "betrothed", "better", "betterment", "betters", "betto
 r", "between", "betwixt", "bevel", "beverage", "bevy", "bewail", "beware", "bewilder", "bewitch", "bey", "beyond", "bezique", "bhang", "bias", "bib", "bible", "biblical", "bibliographer", "bibliography", "bibliophile", "bibulous", "bicarb", "bicarbonate", "bicentenary", "bicentennial", "biceps", "bicker", "bicycle", "bid", "biddable", "bidding", "bide", "bidet", "biennial", "bier", "biff", "bifocals", "bifurcate", "big", "bigamist", "bigamous", "bigamy", "bighead", "bight", "bigot", "bigoted", "bigotry", "bigwig", "bijou", "bike", "bikini", "bilabial", "bilateral", "bilberry", "bile", "bilge", "bilingual", "bilious", "bilk", "bill", "billboard", "billet", "billfold", "billhook", "billiard", "billiards", "billion", "billow", "billposter", "billy", "biltong", "bimetallic", "bimetallism", "bimonthly", "bin", "binary", "bind", "binder", "bindery", "binding", "bindweed", "binge", "bingo", "binnacle", "binocular", "binoculars", "binomial", "biochemistry", "biodegradable", "biographer", "b
 iographical", "biography", "biological", "biology", "biomedical", "bionic", "biosphere", "biotechnology", "bipartisan", "bipartite", "biped", "biplane", "birch", "bird", "birdie", "birdlime", "birdseed", "biretta", "biro", "birth", "birthday", "birthmark", "birthplace", "birthrate", "birthright", "biscuit", "bisect", "bisexual", "bishop", "bishopric", "bismuth", "bison", "bisque", "bistro", "bit", "bitch", "bitchy", "bite", "biting", "bitter", "bittern", "bitters", "bittersweet", "bitty", "bitumen", "bituminous", "bivalve", "bivouac", "biweekly", "bizarre", "blab", "blabber", "blabbermouth", "black", "blackamoor", "blackball", "blackberry", "blackbird", "blackboard", "blackcurrant", "blacken", "blackguard", "blackhead", "blacking", "blackjack", "blackleg", "blacklist", "blackly", "blackmail", "blackout", "blackshirt", "blacksmith", "blackthorn", "bladder", "blade", "blaeberry", "blah", "blame", "blameless", "blameworthy", "blanch", "blancmange", "bland", "blandishments", "blank", "b
 lanket", "blare", "blarney", "blaspheme", "blasphemous", "blasphemy", "blast", "blasted", "blatant", "blather", "blaze", "blazer", "blazes", "blazing", "blazon", "blazonry", "bleach", "bleachers", "bleak", "bleary", "bleat", "bleed", "bleeder", "bleeding", "bleep", "blemish", "blench", "blend", "blender", "bless", "blessed", "blessing", "blether", "blew", "blight", "blighter", "blimey", "blimp", "blind", "blinder", "blinders", "blindfold", "blink", "blinkered", "blinkers", "blinking", "blip", "bliss", "blister", "blistering", "blithe", "blithering", "blitz", "blizzard", "bloated", "bloater", "blob", "bloc", "block", "blockade", "blockage", "blockbuster", "blockhead", "blockhouse", "bloke", "blond", "blood", "bloodbath", "bloodcurdling", "bloodhound", "bloodless", "bloodletting", "bloodshed", "bloodshot", "bloodstain", "bloodstock", "bloodstream", "bloodsucker", "bloodthirsty", "bloody", "bloom", "bloomer", "bloomers", "blooming", "blossom", "blot", "blotch", "blotter", "blotto", "bl
 ouse", "blow", "blower", "blowfly", "blowgun", "blowhard", "blowhole", "blowlamp", "blown", "blowout", "blowpipe", "blowsy", "blowy", "blowzy", "blubber", "bludgeon", "blue", "bluebag", "bluebeard", "bluebell", "blueberry", "bluebird", "bluebottle", "bluecoat", "bluefish", "bluejacket", "blueprint", "blues", "bluestocking", "bluff", "blunder", "blunderbuss", "blunt", "bluntly", "blur", "blurb", "blurt", "blush", "bluster", "blustery", "boa", "boar", "board", "boarder", "boarding", "boardinghouse", "boardroom", "boards", "boardwalk", "boast", "boaster", "boastful", "boat", "boater", "boathouse", "boatman", "boatswain", "bob", "bobbin", "bobby", "bobcat", "bobolink", "bobsleigh", "bobtail", "bobtailed", "bock", "bod", "bode", "bodice", "bodily", "boding", "bodkin", "body", "bodyguard", "bodywork", "boer", "boffin", "bog", "bogey", "boggle", "boggy", "bogie", "bogus", "bohemian", "boil", "boiler", "boisterous", "bold", "boldface", "boldfaced", "bole", "bolero", "boll", "bollard", "boll
 ocks", "boloney", "bolshevik", "bolshevism", "bolshy", "bolster", "bolt", "bolthole", "bomb", "bombard", "bombardier", "bombardment", "bombast", "bomber", "bombproof", "bombshell", "bombsight", "bombsite", "bonanza", "bonbon", "bond", "bondage", "bonded", "bondholder", "bonds", "bone", "boned", "bonehead", "boner", "bonesetter", "boneshaker", "bonfire", "bongo", "bonhomie", "bonito", "bonkers", "bonnet", "bonny", "bonsai", "bonus", "bony", "bonzer", "boo", "boob", "boobs", "booby", "boodle", "boohoo", "book", "bookable", "bookbindery", "bookbinding", "bookcase", "bookend", "booking", "bookish", "bookkeeping", "booklet", "bookmaker", "bookmark", "bookmobile", "bookplate", "books", "bookseller", "bookshop", "bookstall", "bookwork", "bookworm", "boom", "boomerang", "boon", "boor", "boost", "booster", "boot", "bootblack", "booted", "bootee", "booth", "bootlace", "bootleg", "bootless", "boots", "bootstraps", "booty", "booze", "boozer", "boozy", "bop", "bopper", "boracic", "borage", "bora
 x", "bordeaux", "bordello", "border", "borderer", "borderland", "borderline", "bore", "borealis", "borehole", "borer", "born", "borne", "boron", "borough", "borrow", "borrowing", "borscht", "borshcht", "borstal", "borzoi", "bosh", "bosom", "bosomy", "boss", "bossy", "bosun", "botanical", "botanise", "botanist", "botanize", "botany", "botch", "both", "bother", "botheration", "bothersome", "bottle", "bottleful", "bottleneck", "bottom", "bottomless", "botulism", "boudoir", "bouffant", "bougainvillaea", "bougainvillea", "bough", "bought", "bouillabaisse", "bouillon", "boulder", "boulevard", "bounce", "bouncer", "bouncing", "bouncy", "bound", "boundary", "bounden", "bounder", "boundless", "bounds", "bounteous", "bountiful", "bounty", "bouquet", "bourbon", "bourgeois", "bourgeoisie", "bourn", "bourne", "bourse", "bout", "boutique", "bouzouki", "bovine", "bovril", "bovver", "bow", "bowdlerise", "bowdlerize", "bowed", "bowel", "bowels", "bower", "bowerbird", "bowing", "bowl", "bowler", "bow
 lful", "bowline", "bowling", "bowls", "bowman", "bowser", "bowshot", "bowsprit", "bowwow", "box", "boxer", "boxful", "boxing", "boxwood", "boy", "boycott", "boyfriend", "boyhood", "boyish", "boys", "bra", "brace", "bracelet", "bracelets", "braces", "bracing", "bracken", "bracket", "brackish", "bract", "bradawl", "brae", "brag", "braggadocio", "braggart", "brahman", "braid", "braille", "brain", "brainchild", "brainless", "brainpan", "brains", "brainstorm", "brainwash", "brainwashing", "brainwave", "brainy", "braise", "brake", "bramble", "bran", "branch", "brand", "brandish", "brandy", "brash", "brass", "brasserie", "brassiere", "brassy", "brat", "bravado", "brave", "bravo", "bravura", "brawl", "brawn", "brawny", "bray", "brazen", "brazier", "bre", "breach", "bread", "breadbasket", "breadboard", "breadcrumb", "breaded", "breadfruit", "breadline", "breadth", "breadthways", "breadwinner", "break", "breakage", "breakaway", "breakdown", "breaker", "breakfast", "breakneck", "breakout", "br
 eakthrough", "breakup", "breakwater", "bream", "breast", "breastbone", "breastplate", "breaststroke", "breastwork", "breath", "breathalyse", "breathalyser", "breathe", "breather", "breathing", "breathless", "breathtaking", "breathy", "breech", "breeches", "breed", "breeder", "breeding", "breeze", "breezeblock", "breezy", "brethren", "breve", "brevet", "breviary", "brevity", "brew", "brewer", "brewery", "briar", "bribe", "bribery", "brick", "brickbat", "brickfield", "bricklayer", "brickwork", "bridal", "bride", "bridegroom", "bridesmaid", "bridge", "bridgehead", "bridgework", "bridle", "brie", "brief", "briefcase", "briefing", "briefs", "brier", "brig", "brigade", "brigadier", "brigand", "brigandage", "brigantine", "bright", "brighten", "brill", "brilliancy", "brilliant", "brilliantine", "brim", "brimful", "brimfull", "brimstone", "brindled", "brine", "bring", "brink", "brinkmanship", "brioche", "briquet", "briquette", "brisk", "brisket", "bristle", "bristly", "bristols", "brit", "br
 itches", "britisher", "briton", "brittle", "broach", "broad", "broadcast", "broadcasting", "broadcloth", "broaden", "broadloom", "broadminded", "broadsheet", "broadside", "broadsword", "broadways", "brocade", "broccoli", "brochure", "brogue", "broil", "broiler", "broke", "broken", "broker", "brolly", "bromide", "bromine", "bronchial", "bronchitis", "bronco", "brontosaurus", "bronze", "brooch", "brood", "broody", "brook", "broom", "broomstick", "broth", "brothel", "brother", "brotherhood", "brougham", "brought", "brouhaha", "brow", "browbeat", "brown", "brownie", "brownstone", "browse", "brucellosis", "bruin", "bruise", "bruiser", "bruising", "bruit", "brunch", "brunet", "brunette", "brunt", "brush", "brushwood", "brushwork", "brusque", "brutal", "brutalise", "brutality", "brutalize", "brute", "brutish", "bubble", "bubbly", "buccaneer", "buck", "buckboard", "bucked", "bucket", "buckle", "buckler", "buckram", "buckshee", "buckshot", "buckskin", "bucktooth", "buckwheat", "bucolic", "bu
 d", "buddhism", "budding", "buddy", "budge", "budgerigar", "budget", "budgetary", "buff", "buffalo", "buffer", "buffet", "buffoon", "buffoonery", "bug", "bugaboo", "bugbear", "bugger", "buggered", "buggery", "buggy", "bughouse", "bugle", "bugrake", "buhl", "build", "builder", "building", "buildup", "bulb", "bulbous", "bulbul", "bulge", "bulk", "bulkhead", "bulky", "bull", "bulldog", "bulldoze", "bulldozer", "bullet", "bulletin", "bulletproof", "bullfight", "bullfighting", "bullfinch", "bullfrog", "bullheaded", "bullion", "bullnecked", "bullock", "bullring", "bullshit", "bully", "bullyboy", "bulrush", "bulwark", "bum", "bumble", "bumblebee", "bumboat", "bumf", "bummer", "bump", "bumper", "bumph", "bumpkin", "bumptious", "bumpy", "bun", "bunch", "bundle", "bung", "bungalow", "bunghole", "bungle", "bunion", "bunk", "bunker", "bunkered", "bunkhouse", "bunkum", "bunny", "bunting", "buoy", "buoyancy", "bur", "burberry", "burble", "burden", "burdensome", "burdock", "bureau", "bureaucracy",
  "bureaucrat", "bureaucratic", "burg", "burgeon", "burgess", "burgh", "burgher", "burglar", "burglary", "burgle", "burgomaster", "burgundy", "burial", "burlap", "burlesque", "burly", "burn", "burner", "burning", "burnish", "burnous", "burnouse", "burnt", "burp", "burr", "burro", "burrow", "bursar", "bursary", "burst", "burthen", "burton", "bury", "bus", "busby", "bush", "bushbaby", "bushed", "bushel", "bushwhack", "bushy", "business", "businesslike", "businessman", "busk", "busker", "busman", "bust", "bustard", "buster", "bustle", "busy", "busybody", "but", "butane", "butch", "butcher", "butchery", "butler", "butt", "butter", "buttercup", "butterfingers", "butterfly", "buttermilk", "butterscotch", "buttery", "buttock", "buttocks", "button", "buttonhole", "buttonhook", "buttons", "buttress", "buxom", "buy", "buyer", "buzz", "buzzard", "buzzer", "bye", "byelaw", "bygone", "bygones", "bylaw", "bypass", "byplay", "byre", "bystander", "byway", "byways", "byword", "byzantine", "cab", "cab
 al", "cabaret", "cabbage", "cabbie", "cabby", "cabdriver", "caber", "cabin", "cabinet", "cable", "cablegram", "caboodle", "caboose", "cabriolet", "cacao", "cache", "cachet", "cachou", "cackle", "cacophony", "cactus", "cad", "cadaver", "cadaverous", "caddie", "caddy", "cadence", "cadenza", "cadet", "cadge", "cadi", "cadmium", "cadre", "caerphilly", "caesura", "cafeteria", "caffeine", "caftan", "cage", "cagey", "cahoots", "caiman", "caique", "cairn", "caisson", "cajole", "cake", "calabash", "calaboose", "calamitous", "calamity", "calcify", "calcination", "calcine", "calcium", "calculable", "calculate", "calculating", "calculation", "calculator", "calculus", "caldron", "calendar", "calender", "calends", "calf", "calfskin", "caliber", "calibrate", "calibration", "calibre", "calico", "caliper", "calipers", "caliph", "caliphate", "calisthenic", "calisthenics", "calk", "call", "calla", "callboy", "caller", "calligraphy", "calling", "calliper", "callipers", "callisthenic", "callisthenics", 
 "callous", "callow", "callus", "calm", "calomel", "calorie", "calorific", "calumniate", "calumny", "calvary", "calve", "calves", "calvinism", "calypso", "calyx", "cam", "camaraderie", "camber", "cambric", "came", "camel", "camelhair", "camellia", "camembert", "cameo", "camera", "cameraman", "camisole", "camomile", "camouflage", "camp", "campaign", "campanile", "campanology", "campanula", "camper", "campfire", "campground", "camphor", "camphorated", "campion", "campsite", "campus", "camshaft", "can", "canal", "canalise", "canalize", "canard", "canary", "canasta", "cancan", "cancel", "cancellation", "cancer", "cancerous", "candela", "candelabrum", "candid", "candidate", "candidature", "candidly", "candied", "candle", "candlelight", "candlemas", "candlepower", "candlestick", "candlewick", "candor", "candour", "candy", "candyfloss", "candytuft", "cane", "canine", "canis", "canister", "canker", "canna", "cannabis", "canned", "cannelloni", "cannery", "cannibal", "cannibalise", "cannibalis
 m", "cannibalize", "cannon", "cannonade", "cannonball", "cannot", "canny", "canoe", "canon", "canonical", "canonicals", "canonise", "canonize", "canoodle", "canopy", "canst", "cant", "cantab", "cantabrigian", "cantaloup", "cantaloupe", "cantankerous", "cantata", "canteen", "canter", "canticle", "cantilever", "canto", "canton", "cantonment", "cantor", "canvas", "canvass", "canyon", "cap", "capabilities", "capability", "capable", "capacious", "capacity", "caparison", "cape", "caper", "capillarity", "capillary", "capital", "capitalisation", "capitalise", "capitalism", "capitalist", "capitalization", "capitalize", "capitals", "capitation", "capitol", "capitulate", "capitulation", "capitulations", "capon", "capriccio", "caprice", "capricious", "capricorn", "capsicum", "capsize", "capstan", "capsule", "captain", "caption", "captious", "captivate", "captive", "captivity", "captor", "capture", "car", "carafe", "caramel", "carapace", "carat", "caravan", "caravanning", "caravanserai", "carawa
 y", "carbide", "carbine", "carbohydrate", "carbolic", "carbon", "carbonated", "carbonation", "carboniferous", "carbonise", "carbonize", "carborundum", "carboy", "carbuncle", "carburetor", "carburettor", "carcase", "carcass", "carcinogen", "card", "cardamom", "cardboard", "cardiac", "cardigan", "cardinal", "cardpunch", "cards", "cardsharp", "care", "careen", "career", "careerist", "carefree", "careful", "careless", "caress", "caret", "caretaker", "careworn", "cargo", "caribou", "caricature", "caries", "carillon", "carious", "carmelite", "carmine", "carnage", "carnal", "carnation", "carnelian", "carnival", "carnivore", "carnivorous", "carob", "carol", "carotid", "carousal", "carouse", "carousel", "carp", "carpal", "carpenter", "carpentry", "carpet", "carpetbag", "carpetbagger", "carpeting", "carport", "carpus", "carriage", "carriageway", "carrier", "carrion", "carrot", "carroty", "carrousel", "carry", "carryall", "carrycot", "carryout", "carsick", "cart", "cartage", "cartel", "carter"
 , "carthorse", "cartilage", "cartilaginous", "cartographer", "cartography", "carton", "cartoon", "cartridge", "cartwheel", "carve", "carver", "carving", "caryatid", "cascade", "cascara", "case", "casebook", "casein", "casework" };
+        internal static string[] data = new string[] {
+            "aback","abacus","abandon","abandoned","abase",
+            "abash","abate","abattoir","abbess","abbey",
+            "abbot","abbreviate","abbreviation","abc","abdicate",
+            "abdomen","abduct","abed","aberrant","aberration",
+            "abet","abeyance","abhor","abhorrent","abide",
+            "abiding","abilities","ability","abject","abjure",
+            "ablative","ablaut","ablaze","able","ablution",
+            "ablutions","ably","abnegation","abnormal","abo",
+            "aboard","abode","abolish","abolition","abominable",
+            "abominate","abomination","aboriginal","aborigine","abort",
+            "abortion","abortionist","abortive","abound","about",
+            "above","aboveboard","abracadabra","abrade","abrasion",
+            "abrasive","abreast","abridge","abridgement","abridgment",
+            "abroad","abrogate","abrupt","abscess","abscond",
+            "absence","absent","absentee","absenteeism","absently",
+            "absinth","absinthe","absolute","absolutely","absolution",
+            "absolutism","absolve","absorb","absorbent","absorbing",
+            "absorption","abstain","abstemious","abstention","abstinence",
+            "abstract","abstracted","abstraction","abstruse","absurd",
+            "abundance","abundant","abuse","abusive","abut",
+            "abutment","abysmal","abyss","acacia","academic",
+            "academician","academy","accede","accelerate","acceleration",
+            "accelerator","accent","accentuate","accept","acceptable",
+            "acceptance","access","accessible","accession","accessory",
+            "accidence","accident","accidental","acclaim","acclamation",
+            "acclimatize","acclivity","accolade","accommodate","accommodating",
+            "accommodation","accommodations","accompaniment","accompanist","accompany",
+            "accomplice","accomplish","accomplished","accomplishment","accord",
+            "accordance","according","accordingly","accordion","accost",
+            "account","accountable","accountancy","accountant","accoutrements",
+            "accredit","accretion","accrue","accumulate","accumulation",
+            "accumulative","accumulator","accuracy","accurate","accursed",
+            "accusation","accusative","accuse","accused","accustom",
+            "accustomed","ace","acerbity","acetate","acetic",
+            "acetylene","ache","achieve","achievement","achoo",
+            "acid","acidify","acidity","acidulated","acidulous",
+            "acknowledge","acknowledgement","acknowledgment","acme","acne",
+            "acolyte","aconite","acorn","acoustic","acoustics",
+            "acquaint","acquaintance","acquaintanceship","acquiesce","acquiescent",
+            "acquire","acquisition","acquisitive","acquit","acquittal",
+            "acre","acreage","acrid","acrimony","acrobat",
+            "acrobatic","acrobatics","acronym","across","acrostic",
+            "act","acting","actinism","action","actionable",
+            "activate","active","activist","activity","actor",
+            "actress","acts","actual","actuality","actually",
+            "actuary","actuate","acuity","acumen","acupuncture",
+            "acute","adage","adagio","adam","adamant",
+            "adapt","adaptable","adaptation","adapter","adaptor",
+            "adc","add","addendum","adder","addict",
+            "addiction","addictive","addition","additional","additive",
+            "addle","address","addressee","adduce","adenoidal",
+            "adenoids","adept","adequate","adhere","adherence",
+            "adherent","adhesion","adhesive","adieu","adipose",
+            "adj","adjacent","adjective","adjoin","adjourn",
+            "adjudge","adjudicate","adjunct","adjure","adjust",
+            "adjutant","adman","admass","administer","administration",
+            "administrative","administrator","admirable","admiral","admiralty",
+            "admiration","admire","admirer","admissible","admission",
+            "admit","admittance","admitted","admittedly","admixture",
+            "admonish","admonition","admonitory","ado","adobe",
+            "adolescent","adopt","adoption","adoptive","adorable",
+            "adoration","adore","adorn","adornment","adrenalin",
+            "adrift","adroit","adulate","adulation","adult",
+            "adulterate","adulterer","adultery","adumbrate","adv",
+            "advance","advanced","advancement","advances","advantage",
+            "advantageous","advent","adventist","adventitious","adventure",
+            "adventurer","adventuress","adventurous","adverb","adverbial",
+            "adversary","adverse","adversity","advert","advertise",
+            "advertisement","advertising","advice","advisable","advise",
+            "advisedly","adviser","advisor","advisory","advocacy",
+            "advocate","adz","adze","aegis","aeon",
+            "aerate","aerial","aerie","aerobatic","aerobatics",
+            "aerodrome","aerodynamic","aerodynamics","aeronautics","aeroplane",
+            "aerosol","aerospace","aertex","aery","aesthete",
+            "aesthetic","aesthetics","aether","aethereal","aetiology",
+            "afar","affable","affair","affect","affectation",
+            "affected","affecting","affection","affectionate","affiance",
+            "affidavit","affiliate","affiliation","affinity","affirm",
+            "affirmative","affix","afflict","affliction","affluent",
+            "afford","afforest","affray","affricate","affront",
+            "aficionado","afield","afire","aflame","afloat",
+            "afoot","aforesaid","aforethought","afraid","afresh",
+            "afrikaans","afrikaner","afro","aft","after",
+            "afterbirth","aftercare","aftereffect","afterglow","afterlife",
+            "aftermath","afternoon","afternoons","afters","aftershave",
+            "aftertaste","afterthought","afterwards","again","against",
+            "agape","agate","age","ageing","ageless",
+            "agency","agenda","agent","agglomerate","agglutination",
+            "agglutinative","aggrandisement","aggrandizement","aggravate","aggravation",
+            "aggregate","aggregation","aggression","aggressive","aggressor",
+            "aggrieved","aggro","aghast","agile","agitate",
+            "agitation","agitator","aglow","agnostic","ago",
+            "agog","agonise","agonised","agonising","agonize",
+            "agonized","agonizing","agony","agoraphobia","agoraphobic",
+            "agrarian","agree","agreeable","agreeably","agreement",
+            "agriculture","agronomy","aground","ague","aha",
+            "ahead","ahem","ahoy","aid","ail",
+            "aileron","ailment","aim","aimless","air",
+            "airbase","airbed","airbladder","airborne","airbrake",
+            "airbrick","airbus","aircraft","aircraftman","aircrew",
+            "aircushion","airdrop","airedale","airfield","airflow",
+            "airforce","airgun","airhole","airhostess","airily",
+            "airing","airlane","airless","airletter","airlift",
+            "airline","airliner","airlock","airmail","airman",
+            "airplane","airpocket","airport","airs","airshaft",
+            "airship","airsick","airspace","airspeed","airstrip",
+            "airtight","airway","airwoman","airworthy","airy",
+            "aisle","aitch","ajar","akimbo","akin",
+            "alabaster","alack","alacrity","alarm","alarmist",
+            "alas","albatross","albeit","albino","album",
+            "albumen","alchemist","alchemy","alcohol","alcoholic",
+            "alcoholism","alcove","alder","alderman","ale",
+            "alehouse","alert","alfalfa","alfresco","algae",
+            "algebra","algorithm","alias","alibi","alien",
+            "alienate","alienation","alienist","alight","align",
+            "alignment","alike","alimentary","alimony","aline",
+            "alinement","alive","alkali","alkaline","all",
+            "allah","allay","allegation","allege","allegedly",
+            "allegiance","allegorical","allegory","allegretto","allegro",
+            "alleluia","allergic","allergy","alleviate","alley",
+            "alleyway","alliance","allied","alligator","alliteration",
+            "alliterative","allocate","allocation","allopathy","allot",
+            "allotment","allow","allowable","allowance","alloy",
+            "allspice","allude","allure","allurement","allusion",
+            "alluvial","alluvium","ally","almanac","almanack",
+            "almighty","almond","almoner","almost","alms",
+            "aloe","aloft","alone","along","alongside",
+            "aloof","alopecia","aloud","alpaca","alpenhorn",
+            "alpenstock","alpha","alphabet","alphabetical","alpine",
+            "already","alright","alsatian","also","altar",
+            "altarpiece","alter","alteration","altercation","alternate",
+            "alternative","alternator","although","altimeter","altitude",
+            "alto","altogether","altruism","altruist","alum",
+            "aluminium","alumna","alumnus","alveolar","always",
+            "alyssum","amalgam","amalgamate","amanuensis","amass",
+            "amateur","amateurish","amatory","amaze","amazing",
+            "amazon","ambassador","ambassadorial","amber","ambergris",
+            "ambidextrous","ambience","ambient","ambiguous","ambit",
+            "ambition","ambitious","ambivalent","amble","ambrosia",
+            "ambulance","ambush","ame","ameba","ameliorate",
+            "amen","amenable","amend","amendment","amends",
+            "amenity","americanise","americanism","americanize","amethyst",
+            "amiable","amicable","amid","amidships","amir",
+            "amiss","amity","ammeter","ammo","ammonia",
+            "ammonite","ammunition","amnesia","amnesty","amoeba",
+            "amoebic","amok","among","amoral","amorous",
+            "amorphous","amortise","amortize","amount","amour",
+            "amp","amperage","ampersand","amphetamine","amphibian",
+            "amphibious","amphitheater","amphitheatre","amphora","ample",
+            "amplifier","amplify","amplitude","ampoule","amputate",
+            "amputee","amuck","amulet","amuse","amusement",
+            "anachronism","anaconda","anaemia","anaemic","anaesthesia",
+            "anaesthetic","anaesthetist","anagram","anal","analgesia",
+            "analgesic","analog","analogize","analogous","analogue",
+            "analogy","analyse","analysis","analyst","analytic",
+            "anapaest","anarchic","anarchism","anarchist","anarchy",
+            "anathema","anathematize","anatomical","anatomist","anatomy",
+            "ancestor","ancestral","ancestry","anchor","anchorage",
+            "anchorite","anchovy","ancient","ancients","ancillary",
+            "and","andante","andiron","androgynous","anecdotal",
+            "anecdote","anemia","anemometer","anemone","anesthesia",
+            "anesthetise","anesthetize","anew","angel","angelica",
+            "angelus","anger","angle","anglican","anglicise",
+            "anglicism","anglicize","angling","anglophile","anglophilia",
+            "anglophobe","anglophobia","angora","angostura","angry",
+            "angst","anguish","anguished","angular","aniline",
+            "animadversion","animadvert","animal","animalcule","animalism",
+            "animate","animation","animism","animosity","animus",
+            "anis","anise","aniseed","ankle","anklet",
+            "annals","anneal","annex","annexation","annexe",
+            "annihilate","anniversary","annotate","annotation","announce",
+            "announcement","announcer","annoy","annoyance","annual",
+            "annuity","annul","annular","annunciation","anode",
+            "anodyne","anoint","anomalous","anomaly","anon",
+            "anonymity","anonymous","anopheles","anorak","anorexia",
+            "another","answer","answerable","ant","antacid",
+            "antagonism","antagonist","antagonize","antarctic","ante",
+            "anteater","antecedence","antecedent","antecedents","antechamber",
+            "antedate","antediluvian","antelope","antenatal","antenna",
+            "antepenultimate","anterior","anteroom","anthem","anther",
+            "anthill","anthology","anthracite","anthrax","anthropocentric",
+            "anthropoid","anthropologist","anthropology","anthropomorphic","anthropomorphism",
+            "anthropophagous","anthropophagy","antiaircraft","antibiotic","antibody",
+            "antic","anticipate","anticipation","anticipatory","anticlerical",
+            "anticlimax","anticlockwise","antics","anticyclone","antidote",
+            "antifreeze","antigen","antihero","antihistamine","antiknock",
+            "antilogarithm","antimacassar","antimatter","antimony","antipathetic",
+            "antipathy","antipersonnel","antipodal","antipodes","antiquarian",
+            "antiquary","antiquated","antique","antiquity","antirrhinum",
+            "antiseptic","antisocial","antithesis","antithetic","antitoxin",
+            "antler","antonym","anus","anvil","anxiety",
+            "anxious","any","anybody","anyhow","anyplace",
+            "anyroad","anything","anyway","anywhere","aorta",
+            "apace","apanage","apart","apartheid","apartment",
+            "apartments","apathetic","apathy","ape","aperient",
+            "aperitif","aperture","apex","aphasia","aphasic",
+            "aphid","aphorism","aphoristic","aphrodisiac","apiarist",
+            "apiary","apices","apiculture","apiece","apish",
+            "aplomb","apocalypse","apocalyptic","apocrypha","apocryphal",
+            "apogee","apologetic","apologetics","apologia","apologise",
+            "apologist","apologize","apology","apophthegm","apoplectic",
+            "apoplexy","apostasy","apostate","apostatise","apostatize",
+            "apostle","apostolic","apostrophe","apostrophize","apothecary",
+            "apothegm","apotheosis","appal","appall","appalling",
+            "appanage","apparatus","apparel","apparent","apparently",
+            "apparition","appeal","appealing","appear","appearance",
+            "appearances","appease","appeasement","appellant","appellate",
+            "appellation","append","appendage","appendectomy","appendicitis",
+            "appendix","appertain","appetite","appetizer","appetizing",
+            "applaud","applause","apple","applejack","appliance",
+            "applicable","applicant","application","applied","apply",
+            "appoint","appointment","appointments","apportion","apposite",
+            "apposition","appraisal","appraise","appreciable","appreciate",
+            "appreciation","appreciative","apprehend","apprehension","apprehensive",
+            "apprentice","apprenticeship","apprise","appro","approach",
+            "approachable","approbation","approbatory","appropriate","appropriation",
+            "approval","approve","approx","approximate","approximation",
+            "appurtenance","apricot","april","apron","apropos",
+            "apse","apt","aptitude","aqualung","aquamarine",
+            "aquaplane","aquarium","aquatic","aquatint","aqueduct",
+            "aqueous","aquiline","arab","arabesque","arabic",
+            "arable","arachnid","arak","arbiter","arbitrary",
+            "arbitrate","arbitration","arbitrator","arbor","arboreal",
+            "arboretum","arbour","arc","arcade","arcadia",
+            "arcane","arch","archaeology","archaic","archaism",
+            "archangel","archbishop","archbishopric","archdeacon","archdeaconry",
+            "archdiocese","archduke","archeology","archer","archery",
+            "archetype","archimandrite","archipelago","architect","architecture",
+            "archive","archway","arctic","ardent","ardor",
+            "ardour","arduous","are","area","areca",
+            "arena","argent","argon","argot","arguable",
+            "argue","argument","argumentative","aria","arid",
+            "aries","aright","arise","aristocracy","aristocrat",
+            "aristocratic","arithmetic","arithmetician","ark","arm",
+            "armada","armadillo","armament","armature","armband",
+            "armchair","armed","armful","armhole","armistice",
+            "armlet","armor","armorer","armorial","armory",
+            "armour","armoured","armourer","armoury","armpit",
+            "arms","army","aroma","aromatic","arose",
+            "around","arouse","arpeggio","arquebus","arrack",
+            "arraign","arrange","arrangement","arrant","arras",
+            "array","arrears","arrest","arrival","arrive",
+            "arrogance","arrogant","arrogate","arrow","arrowhead",
+            "arrowroot","arse","arsenal","arsenic","arson",
+            "art","artefact","arterial","arteriosclerosis","artery",
+            "artful","arthritis","artichoke","article","articles",
+            "articulate","articulated","articulateness","articulation","artifact",
+            "artifice","artificer","artificial","artillery","artisan",
+            "artist","artiste","artistic","artistry","artless",
+            "arts","arty","arum","asbestos","ascend",
+            "ascendancy","ascendant","ascendency","ascendent","ascension",
+            "ascent","ascertain","ascetic","ascribe","ascription",
+            "asepsis","aseptic","asexual","ash","ashamed",
+            "ashbin","ashcan","ashen","ashes","ashore",
+            "ashtray","ashy","aside","asinine","ask",
+            "askance","askew","aslant","asleep","asp",
+            "asparagus","aspect","aspectual","aspen","asperity",
+            "aspersion","asphalt","asphodel","asphyxia","asphyxiate",
+            "aspic","aspidistra","aspirant","aspirate","aspiration",
+            "aspire","aspirin","ass","assagai","assail",
+            "assailant","assassin","assassinate","assault","assay",
+            "assegai","assemblage","assemble","assembly","assemblyman",
+            "assent","assert","assertion","assertive","assess",
+            "assessment","assessor","asset","asseverate","assiduity",
+            "assiduous","assign","assignation","assignment","assimilate",
+            "assimilation","assist","assistance","assistant","assize",
+            "assizes","associate","association","assonance","assort",
+            "assorted","assortment","asst","assuage","assume",
+            "assumption","assurance","assure","assured","aster",
+            "asterisk","astern","asteroid","asthma","astigmatic",
+            "astigmatism","astir","astonish","astonishment","astound",
+            "astrakhan","astral","astray","astride","astringent",
+            "astrolabe","astrologer","astrology","astronaut","astronautics",
+            "astronomer","astronomical","astronomy","astrophysics","astute",
+            "asunder","asylum","asymmetric","atavism","atchoo",
+            "ate","atelier","atheism","atheist","athlete",
+            "athletic","athletics","athwart","atishoo","atlas",
+            "atmosphere","atmospheric","atmospherics","atoll","atom",
+            "atomic","atomise","atomize","atonal","atonality",
+            "atone","atop","atrocious","atrocity","atrophy",
+            "attach","attachment","attack","attain","attainder",
+            "attainment","attar","attempt","attend","attendance",
+            "attendant","attention","attentive","attenuate","attest",
+            "attestation","attested","attic","attire","attitude",
+            "attitudinise","attitudinize","attorney","attract","attraction",
+            "attractive","attributable","attribute","attribution","attributive",
+            "attrition","attune","atypical","aubergine","aubrietia",
+            "auburn","auction","auctioneer","audacious","audacity",
+            "audible","audience","audio","audiometer","audit",
+            "audition","auditor","auditorium","auditory","auger",
+            "aught","augment","augmentation","augur","augury",
+            "august","auk","aunt","aura","aural",
+            "aureole","auricle","auricular","auriferous","aurora",
+            "auscultation","auspices","auspicious","aussie","austere",
+            "austerity","australasian","autarchy","autarky","authentic",
+            "authenticate","authenticity","author","authoress","authorisation",
+            "authorise","authoritarian","authoritative","authority","authorization",
+            "authorize","authorship","autism","autistic","auto",
+            "autobahn","autobiographical","autobiography","autocracy","autocrat",
+            "autoeroticism","autograph","automat","automate","automatic",
+            "automation","automatism","automaton","automobile","autonomous",
+            "autonomy","autopsy","autostrada","autosuggestion","autumn",
+            "autumnal","auxiliary","avail","available","avalanche",
+            "avarice","avaricious","avatar","avaunt","avenge",
+            "avenue","aver","average","averse","aversion",
+            "aversive","avert","aviary","aviation","aviator",
+            "avid","avocado","avocation","avocet","avoid",
+            "avoidance","avoirdupois","avow","avowal","avowed",
+            "avuncular","await","awake","awaken","awakening",
+            "award","aware","awash","away","awe",
+            "awesome","awestruck","awful","awfully","awhile",
+            "awkward","awl","awning","awoke","awoken",
+            "awry","axe","axiom","axiomatic","axis",
+            "axle","axolotl","ayah","aye","azalea",
+            "azimuth","azure","baa","babble","babbler",
+            "babe","babel","baboo","baboon","babu",
+            "baby","babyhood","babyish","baccalaureate","baccara",
+            "baccarat","bacchanal","baccy","bachelor","bacillus",
+            "back","backache","backbench","backbite","backbone",
+            "backbreaking","backchat","backcloth","backcomb","backdate",
+            "backdrop","backer","backfire","backgammon","background",
+            "backhand","backhanded","backhander","backing","backlash",
+            "backlog","backmost","backpedal","backside","backslide",
+            "backspace","backstage","backstairs","backstay","backstroke",
+            "backtrack","backup","backward","backwards","backwash",
+            "backwater","backwoods","backwoodsman","backyard","bacon",
+            "bacteria","bacteriology","bactrian","bad","bade",
+            "badge","badger","badinage","badly","badminton",
+            "baffle","baffling","bag","bagatelle","bagful",
+            "baggage","baggy","bagpipes","bags","bah",
+            "bail","bailey","bailiff","bairn","bait",
+            "baize","bake","bakelite","baker","bakery",
+            "baksheesh","balaclava","balalaika","balance","balanced",
+            "balcony","bald","balderdash","balding","baldly",
+            "baldric","bale","baleful","balk","ball",
+            "ballad","ballade","ballast","ballcock","ballerina",
+            "ballet","ballistic","ballistics","ballocks","balloon",
+            "ballooning","balloonist","ballot","ballpoint","ballroom",
+            "balls","bally","ballyhoo","balm","balmy",
+            "baloney","balsa","balsam","balustrade","bamboo",
+            "bamboozle","ban","banal","banana","band",
+            "bandage","bandana","bandanna","bandbox","bandeau",
+            "bandit","banditry","bandmaster","bandoleer","bandolier",
+            "bandsman","bandstand","bandwagon","bandy","bane",
+            "baneful","bang","banger","bangle","banian",
+            "banish","banister","banjo","bank","bankbook",
+            "banker","banking","bankrupt","bankruptcy","banner",
+            "bannock","banns","banquet","banshee","bantam",
+            "bantamweight","banter","banyan","baobab","baptise",
+            "baptism","baptist","baptize","bar","barb",
+            "barbarian","barbaric","barbarise","barbarism","barbarize",
+            "barbarous","barbecue","barbed","barbel","barber",
+            "barbican","barbiturate","barcarole","barcarolle","bard",
+            "bare","bareback","barebacked","barefaced","barefoot",
+            "bareheaded","barelegged","barely","bargain","barge",
+            "bargee","baritone","barium","bark","barker",
+            "barley","barleycorn","barmaid","barman","barmy",
+            "barn","barnacle","barnstorm","barnyard","barograph",
+            "barometer","baron","baroness","baronet","baronetcy",
+            "baronial","barony","baroque","barque","barrack",
+            "barracks","barracuda","barrage","barred","barrel",
+            "barren","barricade","barricades","barrier","barring",
+            "barrister","barrow","bartender","barter","basalt",
+            "base","baseball","baseboard","baseless","baseline",
+            "basement","bases","bash","bashful","basic",
+            "basically","basics","basil","basilica","basilisk",
+            "basin","basis","bask","basket","basketball",
+            "basketful","basketry","basketwork","bass","basset",
+            "bassinet","bassoon","bast","bastard","bastardise",
+            "bastardize","bastardy","baste","bastinado","bastion",
+            "bat","batch","bated","bath","bathing",
+            "bathos","bathrobe","bathroom","baths","bathtub",
+            "bathysphere","batik","batiste","batman","baton",
+            "bats","batsman","battalion","batten","batter",
+            "battery","battle","battleax","battleaxe","battlefield",
+            "battlements","battleship","batty","bauble","baulk",
+            "bauxite","bawd","bawdy","bawl","bay",
+            "bayonet","bayou","bazaar","bazooka","bbc",
+            "beach","beachcomber","beachhead","beachwear","beacon",
+            "bead","beading","beadle","beady","beagle",
+            "beagling","beak","beaker","beam","bean",
+            "beanpole","beanstalk","bear","bearable","beard",
+            "bearded","bearer","bearing","bearings","bearish",
+            "bearskin","beast","beastly","beat","beaten",
+            "beater","beatific","beatification","beatify","beating",
+            "beatitude","beatitudes","beatnik","beau","beaujolais",
+            "beaut","beauteous","beautician","beautiful","beautify",
+            "beauty","beaver","bebop","becalmed","because",
+            "beck","beckon","become","becoming","bed",
+            "bedaub","bedbug","bedclothes","bedding","bedeck",
+            "bedevil","bedewed","bedfellow","bedimmed","bedlam",
+            "bedouin","bedpan","bedpost","bedraggled","bedridden",
+            "bedrock","bedroom","bedside","bedsore","bedspread",
+            "bedstead","bedtime","bee","beech","beef",
+            "beefcake","beefeater","beefsteak","beefy","beehive",
+            "beeline","been","beer","beery","beeswax",
+            "beet","beetle","beetling","beetroot","beeves",
+            "befall","befit","befitting","before","beforehand",
+            "befriend","befuddle","beg","beget","beggar",
+            "beggarly","beggary","begin","beginner","beginning",
+            "begone","begonia","begorra","begot","begotten",
+            "begrudge","beguile","begum","begun","behalf",
+            "behave","behavior","behaviorism","behaviour","behaviourism",
+            "behead","behemoth","behest","behind","behindhand",
+            "behold","beholden","behove","beige","being",
+            "belabor","belabour","belated","belay","belch",
+            "beleaguer","belfry","belie","belief","believable",
+            "believe","believer","belittle","bell","belladonna",
+            "bellboy","belle","bellflower","bellicose","belligerency",
+            "belligerent","bellow","bellows","belly","bellyache",
+            "bellyful","belong","belongings","beloved","below",
+            "belt","belted","belting","beltway","bemoan",
+            "bemused","ben","bench","bencher","bend",
+            "bended","bends","beneath","benedictine","benediction",
+            "benedictus","benefaction","benefactor","benefice","beneficent",
+            "beneficial","beneficiary","benefit","benevolence","benevolent",
+            "benighted","benign","benignity","bent","benumbed",
+            "benzedrine","benzene","benzine","bequeath","bequest",
+            "berate","bereave","bereaved","bereavement","bereft",
+            "beret","beriberi","berk","berry","berserk",
+            "berth","beryl","beseech","beseem","beset",
+            "besetting","beside","besides","besiege","besmear",
+            "besmirch","besom","besotted","besought","bespattered",
+            "bespeak","bespoke","best","bestial","bestiality",
+            "bestiary","bestir","bestow","bestrew","bestride",
+            "bet","beta","betake","betel","bethel",
+            "bethink","betide","betimes","betoken","betray",
+            "betrayal","betroth","betrothal","betrothed","better",
+            "betterment","betters","bettor","between","betwixt",
+            "bevel","beverage","bevy","bewail","beware",
+            "bewilder","bewitch","bey","beyond","bezique",
+            "bhang","bias","bib","bible","biblical",
+            "bibliographer","bibliography","bibliophile","bibulous","bicarb",
+            "bicarbonate","bicentenary","bicentennial","biceps","bicker",
+            "bicycle","bid","biddable","bidding","bide",
+            "bidet","biennial","bier","biff","bifocals",
+            "bifurcate","big","bigamist","bigamous","bigamy",
+            "bighead","bight","bigot","bigoted","bigotry",
+            "bigwig","bijou","bike","bikini","bilabial",
+            "bilateral","bilberry","bile","bilge","bilingual",
+            "bilious","bilk","bill","billboard","billet",
+            "billfold","billhook","billiard","billiards","billion",
+            "billow","billposter","billy","biltong","bimetallic",
+            "bimetallism","bimonthly","bin","binary","bind",
+            "binder","bindery","binding","bindweed","binge",
+            "bingo","binnacle","binocular","binoculars","binomial",
+            "biochemistry","biodegradable","biographer","biographical","biography",
+            "biological","biology","biomedical","bionic","biosphere",
+            "biotechnology","bipartisan","bipartite","biped","biplane",
+            "birch","bird","birdie","birdlime","birdseed",
+            "biretta","biro","birth","birthday","birthmark",
+            "birthplace","birthrate","birthright","biscuit","bisect",
+            "bisexual","bishop","bishopric","bismuth","bison",
+            "bisque","bistro","bit","bitch","bitchy",
+            "bite","biting","bitter","bittern","bitters",
+            "bittersweet","bitty","bitumen","bituminous","bivalve",
+            "bivouac","biweekly","bizarre","blab","blabber",
+            "blabbermouth","black","blackamoor","blackball","blackberry",
+            "blackbird","blackboard","blackcurrant","blacken","blackguard",
+            "blackhead","blacking","blackjack","blackleg","blacklist",
+            "blackly","blackmail","blackout","blackshirt","blacksmith",
+            "blackthorn","bladder","blade","blaeberry","blah",
+            "blame","blameless","blameworthy","blanch","blancmange",
+            "bland","blandishments","blank","blanket","blare",
+            "blarney","blaspheme","blasphemous","blasphemy","blast",
+            "blasted","blatant","blather","blaze","blazer",
+            "blazes","blazing","blazon","blazonry","bleach",
+            "bleachers","bleak","bleary","bleat","bleed",
+            "bleeder","bleeding","bleep","blemish","blench",
+            "blend","blender","bless","blessed","blessing",
+            "blether","blew","blight","blighter","blimey",
+            "blimp","blind","blinder","blinders","blindfold",
+            "blink","blinkered","blinkers","blinking","blip",
+            "bliss","blister","blistering","blithe","blithering",
+            "blitz","blizzard","bloated","bloater","blob",
+            "bloc","block","blockade","blockage","blockbuster",
+            "blockhead","blockhouse","bloke","blond","blood",
+            "bloodbath","bloodcurdling","bloodhound","bloodless","bloodletting",
+            "bloodshed","bloodshot","bloodstain","bloodstock","bloodstream",
+            "bloodsucker","bloodthirsty","bloody","bloom","bloomer",
+            "bloomers","blooming","blossom","blot","blotch",
+            "blotter","blotto","blouse","blow","blower",
+            "blowfly","blowgun","blowhard","blowhole","blowlamp",
+            "blown","blowout","blowpipe","blowsy","blowy",
+            "blowzy","blubber","bludgeon","blue","bluebag",
+            "bluebeard","bluebell","blueberry","bluebird","bluebottle",
+            "bluecoat","bluefish","bluejacket","blueprint","blues",
+            "bluestocking","bluff","blunder","blunderbuss","blunt",
+            "bluntly","blur","blurb","blurt","blush",
+            "bluster","blustery","boa","boar","board",
+            "boarder","boarding","boardinghouse","boardroom","boards",
+            "boardwalk","boast","boaster","boastful","boat",
+            "boater","boathouse","boatman","boatswain","bob",
+            "bobbin","bobby","bobcat","bobolink","bobsleigh",
+            "bobtail","bobtailed","bock","bod","bode",
+            "bodice","bodily","boding","bodkin","body",
+            "bodyguard","bodywork","boer","boffin","bog",
+            "bogey","boggle","boggy","bogie","bogus",
+            "bohemian","boil","boiler","boisterous","bold",
+            "boldface","boldfaced","bole","bolero","boll",
+            "bollard","bollocks","boloney","bolshevik","bolshevism",
+            "bolshy","bolster","bolt","bolthole","bomb",
+            "bombard","bombardier","bombardment","bombast","bomber",
+            "bombproof","bombshell","bombsight","bombsite","bonanza",
+            "bonbon","bond","bondage","bonded","bondholder",
+            "bonds","bone","boned","bonehead","boner",
+            "bonesetter","boneshaker","bonfire","bongo","bonhomie",
+            "bonito","bonkers","bonnet","bonny","bonsai",
+            "bonus","bony","bonzer","boo","boob",
+            "boobs","booby","boodle","boohoo","book",
+            "bookable","bookbindery","bookbinding","bookcase","bookend",
+            "booking","bookish","bookkeeping","booklet","bookmaker",
+            "bookmark","bookmobile","bookplate","books","bookseller",
+            "bookshop","bookstall","bookwork","bookworm","boom",
+            "boomerang","boon","boor","boost","booster",
+            "boot","bootblack","booted","bootee","booth",
+            "bootlace","bootleg","bootless","boots","bootstraps",
+            "booty","booze","boozer","boozy","bop",
+            "bopper","boracic","borage","borax","bordeaux",
+            "bordello","border","borderer","borderland","borderline",
+            "bore","borealis","borehole","borer","born",
+            "borne","boron","borough","borrow","borrowing",
+            "borscht","borshcht","borstal","borzoi","bosh",
+            "bosom","bosomy","boss","bossy","bosun",
+            "botanical","botanise","botanist","botanize","botany",
+            "botch","both","bother","botheration","bothersome",
+            "bottle","bottleful","bottleneck","bottom","bottomless",
+            "botulism","boudoir","bouffant","bougainvillaea","bougainvillea",
+            "bough","bought","bouillabaisse","bouillon","boulder",
+            "boulevard","bounce","bouncer","bouncing","bouncy",
+            "bound","boundary","bounden","bounder","boundless",
+            "bounds","bounteous","bountiful","bounty","bouquet",
+            "bourbon","bourgeois","bourgeoisie","bourn","bourne",
+            "bourse","bout","boutique","bouzouki","bovine",
+            "bovril","bovver","bow","bowdlerise","bowdlerize",
+            "bowed","bowel","bowels","bower","bowerbird",
+            "bowing","bowl","bowler","bowlful","bowline",
+            "bowling","bowls","bowman","bowser","bowshot",
+            "bowsprit","bowwow","box","boxer","boxful",
+            "boxing","boxwood","boy","boycott","boyfriend",
+            "boyhood","boyish","boys","bra","brace",
+            "bracelet","bracelets","braces","bracing","bracken",
+            "bracket","brackish","bract","bradawl","brae",
+            "brag","braggadocio","braggart","brahman","braid",
+            "braille","brain","brainchild","brainless","brainpan",
+            "brains","brainstorm","brainwash","brainwashing","brainwave",
+            "brainy","braise","brake","bramble","bran",
+            "branch","brand","brandish","brandy","brash",
+            "brass","brasserie","brassiere","brassy","brat",
+            "bravado","brave","bravo","bravura","brawl",
+            "brawn","brawny","bray","brazen","brazier",
+            "bre","breach","bread","breadbasket","breadboard",
+            "breadcrumb","breaded","breadfruit","breadline","breadth",
+            "breadthways","breadwinner","break","breakage","breakaway",
+            "breakdown","breaker","breakfast","breakneck","breakout",
+            "breakthrough","breakup","breakwater","bream","breast",
+            "breastbone","breastplate","breaststroke","breastwork","breath",
+            "breathalyse","breathalyser","breathe","breather","breathing",
+            "breathless","breathtaking","breathy","breech","breeches",
+            "breed","breeder","breeding","breeze","breezeblock",
+            "breezy","brethren","breve","brevet","breviary",
+            "brevity","brew","brewer","brewery","briar",
+            "bribe","bribery","brick","brickbat","brickfield",
+            "bricklayer","brickwork","bridal","bride","bridegroom",
+            "bridesmaid","bridge","bridgehead","bridgework","bridle",
+            "brie","brief","briefcase","briefing","briefs",
+            "brier","brig","brigade","brigadier","brigand",
+            "brigandage","brigantine","bright","brighten","brill",
+            "brilliancy","brilliant","brilliantine","brim","brimful",
+            "brimfull","brimstone","brindled","brine","bring",
+            "brink","brinkmanship","brioche","briquet","briquette",
+            "brisk","brisket","bristle","bristly","bristols",
+            "brit","britches","britisher","briton","brittle",
+            "broach","broad","broadcast","broadcasting","broadcloth",
+            "broaden","broadloom","broadminded","broadsheet","broadside",
+            "broadsword","broadways","brocade","broccoli","brochure",
+            "brogue","broil","broiler","broke","broken",
+            "broker","brolly","bromide","bromine","bronchial",
+            "bronchitis","bronco","brontosaurus","bronze","brooch",
+            "brood","broody","brook","broom","broomstick",
+            "broth","brothel","brother","brotherhood","brougham",
+            "brought","brouhaha","brow","browbeat","brown",
+            "brownie","brownstone","browse","brucellosis","bruin",
+            "bruise","bruiser","bruising","bruit","brunch",
+            "brunet","brunette","brunt","brush","brushwood",
+            "brushwork","brusque","brutal","brutalise","brutality",
+            "brutalize","brute","brutish","bubble","bubbly",
+            "buccaneer","buck","buckboard","bucked","bucket",
+            "buckle","buckler","buckram","buckshee","buckshot",
+            "buckskin","bucktooth","buckwheat","bucolic","bud",
+            "buddhism","budding","buddy","budge","budgerigar",
+            "budget","budgetary","buff","buffalo","buffer",
+            "buffet","buffoon","buffoonery","bug","bugaboo",
+            "bugbear","bugger","buggered","buggery","buggy",
+            "bughouse","bugle","bugrake","buhl","build",
+            "builder","building","buildup","bulb","bulbous",
+            "bulbul","bulge","bulk","bulkhead","bulky",
+            "bull","bulldog","bulldoze","bulldozer","bullet",
+            "bulletin","bulletproof","bullfight","bullfighting","bullfinch",
+            "bullfrog","bullheaded","bullion","bullnecked","bullock",
+            "bullring","bullshit","bully","bullyboy","bulrush",
+            "bulwark","bum","bumble","bumblebee","bumboat",
+            "bumf","bummer","bump","bumper","bumph",
+            "bumpkin","bumptious","bumpy","bun","bunch",
+            "bundle","bung","bungalow","bunghole","bungle",
+            "bunion","bunk","bunker","bunkered","bunkhouse",
+            "bunkum","bunny","bunting","buoy","buoyancy",
+            "bur","burberry","burble","burden","burdensome",
+            "burdock","bureau","bureaucracy","bureaucrat","bureaucratic",
+            "burg","burgeon","burgess","burgh","burgher",
+            "burglar","burglary","burgle","burgomaster","burgundy",
+            "burial","burlap","burlesque","burly","burn",
+            "burner","burning","burnish","burnous","burnouse",
+            "burnt","burp","burr","burro","burrow",
+            "bursar","bursary","burst","burthen","burton",
+            "bury","bus","busby","bush","bushbaby",
+            "bushed","bushel","bushwhack","bushy","business",
+            "businesslike","businessman","busk","busker","busman",
+            "bust","bustard","buster","bustle","busy",
+            "busybody","but","butane","butch","butcher",
+            "butchery","butler","butt","butter","buttercup",
+            "butterfingers","butterfly","buttermilk","butterscotch","buttery",
+            "buttock","buttocks","button","buttonhole","buttonhook",
+            "buttons","buttress","buxom","buy","buyer",
+            "buzz","buzzard","buzzer","bye","byelaw",
+            "bygone","bygones","bylaw","bypass","byplay",
+            "byre","bystander","byway","byways","byword",
+            "byzantine","cab","cabal","cabaret","cabbage",
+            "cabbie","cabby","cabdriver","caber","cabin",
+            "cabinet","cable","cablegram","caboodle","caboose",
+            "cabriolet","cacao","cache","cachet","cachou",
+            "cackle","cacophony","cactus","cad","cadaver",
+            "cadaverous","caddie","caddy","cadence","cadenza",
+            "cadet","cadge","cadi","cadmium","cadre",
+            "caerphilly","caesura","cafeteria","caffeine","caftan",
+            "cage","cagey","cahoots","caiman","caique",
+            "cairn","caisson","cajole","cake","calabash",
+            "calaboose","calamitous","calamity","calcify","calcination",
+            "calcine","calcium","calculable","calculate","calculating",
+            "calculation","calculator","calculus","caldron","calendar",
+            "calender","calends","calf","calfskin","caliber",
+            "calibrate","calibration","calibre","calico","caliper",
+            "calipers","caliph","caliphate","calisthenic","calisthenics",
+            "calk","call","calla","callboy","caller",
+            "calligraphy","calling","calliper","callipers","callisthenic",
+            "callisthenics","callous","callow","callus","calm",
+            "calomel","calorie","calorific","calumniate","calumny",
+            "calvary","calve","calves","calvinism","calypso",
+            "calyx","cam","camaraderie","camber","cambric",
+            "came","camel","camelhair","camellia","camembert",
+            "cameo","camera","cameraman","camisole","camomile",
+            "camouflage","camp","campaign","campanile","campanology",
+            "campanula","camper","campfire","campground","camphor",
+            "camphorated","campion","campsite","campus","camshaft",
+            "can","canal","canalise","canalize","canard",
+            "canary","canasta","cancan","cancel","cancellation",
+            "cancer","cancerous","candela","candelabrum","candid",
+            "candidate","candidature","candidly","candied","candle",
+            "candlelight","candlemas","candlepower","candlestick","candlewick",
+            "candor","candour","candy","candyfloss","candytuft",
+            "cane","canine","canis","canister","canker",
+            "canna","cannabis","canned","cannelloni","cannery",
+            "cannibal","cannibalise","cannibalism","cannibalize","cannon",
+            "cannonade","cannonball","cannot","canny","canoe",
+            "canon","canonical","canonicals","canonise","canonize",
+            "canoodle","canopy","canst","cant","cantab",
+            "cantabrigian","cantaloup","cantaloupe","cantankerous","cantata",
+            "canteen","canter","canticle","cantilever","canto",
+            "canton","cantonment","cantor","canvas","canvass",
+            "canyon","cap","capabilities","capability","capable",
+            "capacious","capacity","caparison","cape","caper",
+            "capillarity","capillary","capital","capitalisation","capitalise",
+            "capitalism","capitalist","capitalization","capitalize","capitals",
+            "capitation","capitol","capitulate","capitulation","capitulations",
+            "capon","capriccio","caprice","capricious","capricorn",
+            "capsicum","capsize","capstan","capsule","captain",
+            "caption","captious","captivate","captive","captivity",
+            "captor","capture","car","carafe","caramel",
+            "carapace","carat","caravan","caravanning","caravanserai",
+            "caraway","carbide","carbine","carbohydrate","carbolic",
+            "carbon","carbonated","carbonation","carboniferous","carbonise",
+            "carbonize","carborundum","carboy","carbuncle","carburetor",
+            "carburettor","carcase","carcass","carcinogen","card",
+            "cardamom","cardboard","cardiac","cardigan","cardinal",
+            "cardpunch","cards","cardsharp","care","careen",
+            "career","careerist","carefree","careful","careless",
+            "caress","caret","caretaker","careworn","cargo",
+            "caribou","caricature","caries","carillon","carious",
+            "carmelite","carmine","carnage","carnal","carnation",
+            "carnelian","carnival","carnivore","carnivorous","carob",
+            "carol","carotid","carousal","carouse","carousel",
+            "carp","carpal","carpenter","carpentry","carpet",
+            "carpetbag","carpetbagger","carpeting","carport","carpus",
+            "carriage","carriageway","carrier","carrion","carrot",
+            "carroty","carrousel","carry","carryall","carrycot",
+            "carryout","carsick","cart","cartage","cartel",
+            "carter","carthorse","cartilage","cartilaginous","cartographer",
+            "cartography","carton","cartoon","cartridge","cartwheel",
+            "carve","carver","carving","caryatid","cascade",
+            "cascara","case","casebook","casein","casework",
+        };
     }
 }
\ No newline at end of file


[21/50] [abbrv] lucenenet git commit: Fixed some "key not found" bugs in the Join.TestJoinUtil class.

Posted by sy...@apache.org.
Fixed some "key not found" bugs in the Join.TestJoinUtil class.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/22663d7a
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/22663d7a
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/22663d7a

Branch: refs/heads/analysis-work
Commit: 22663d7ae0642af5f3adc59618413b55d48acab3
Parents: 406f88a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 07:43:11 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 07:43:11 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/22663d7a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
index 81513c7..7e20840 100644
--- a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
+++ b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -796,7 +796,7 @@ namespace Lucene.Net.Tests.Join
                 while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                 {
                     docTermOrds.LookupOrd(ord, joinValue);
-                    var joinScore = JoinValueToJoinScores[joinValue];
+                    var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null;
                     if (joinScore == null)
                     {
                         JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
@@ -855,7 +855,7 @@ namespace Lucene.Net.Tests.Join
                     return;
                 }
 
-                var joinScore = JoinValueToJoinScores[joinValue];
+                var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null;
                 if (joinScore == null)
                 {
                     JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
@@ -912,7 +912,7 @@ namespace Lucene.Net.Tests.Join
                 while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                 {
                     docTermOrds.LookupOrd(ord, scratch);
-                    JoinScore joinScore = _joinValueToJoinScores[scratch];
+                    JoinScore joinScore = _joinValueToJoinScores.ContainsKey(scratch) ? _joinValueToJoinScores[scratch] : null;
                     if (joinScore == null)
                     {
                         continue;


[43/50] [abbrv] lucenenet git commit: Merge branch 'analysis-hunspell-dic' into analysis-work-2

Posted by sy...@apache.org.
Merge branch 'analysis-hunspell-dic' into analysis-work-2


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f5d76c0b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f5d76c0b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f5d76c0b

Branch: refs/heads/analysis-work
Commit: f5d76c0bd42e2215a1c2cc914bc0b73208661290
Parents: 87c1d60 4011a39
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 03:01:07 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 03:01:07 2016 +0700

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../Analysis/Hunspell/Dictionary.cs             |  51 +++-
 .../Analysis/Hunspell/ISO8859_14Decoder.cs      |   2 +-
 .../Analysis/Hunspell/TestAllDictionaries.cs    | 228 +++++++++++----
 .../Analysis/Hunspell/TestAllDictionaries2.cs   | 287 ++++++++++++++-----
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   2 +
 6 files changed, 444 insertions(+), 127 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f5d76c0b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------


[48/50] [abbrv] lucenenet git commit: Fix for CharTokenizer.IsTokenChar() to revert the parameter back to int as was intended. A char cannot represent a surrogate pair, which makes it impossible to use IsTokenChar() with surrogate pairs.

Posted by sy...@apache.org.
Fix for CharTokenizer.IsTokenChar() to revert the parameter back to int as was intended. A char cannot represent a surrogate pair, which makes it impossible to use IsTokenChar() with surrogate pairs.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/053d3efc
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/053d3efc
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/053d3efc

Branch: refs/heads/analysis-work
Commit: 053d3efcb647dac4c681ddf3999eda18b3964b11
Parents: c36a0bd
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 14:37:12 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 16:05:50 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Ar/ArabicLetterTokenizer.cs                        | 5 +++--
 src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs | 2 +-
 .../Analysis/Core/WhitespaceTokenizer.cs                        | 4 ++--
 src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs    | 2 +-
 .../Analysis/Ru/RussianLetterTokenizer.cs                       | 5 +++--
 src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs   | 4 ++--
 .../Analysis/Util/TestCharTokenizers.cs                         | 2 +-
 7 files changed, 13 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
index 9e36d25..5fa5827 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -1,4 +1,5 @@
 \ufeffusing Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Globalization;
@@ -74,9 +75,9 @@ namespace Lucene.Net.Analysis.Ar
 	  /// <summary>
 	  /// Allows for Letter category or NonspacingMark category </summary>
 	  /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso>
-	  protected override bool IsTokenChar(char c)
+	  protected override bool IsTokenChar(int c)
 	  {
-            return base.IsTokenChar(c) || char.GetUnicodeCategory((char)c) == UnicodeCategory.NonSpacingMark;
+            return base.IsTokenChar(c) || Character.GetType(c) == UnicodeCategory.NonSpacingMark;
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
index 9a0b57d..9d3dc2b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LetterTokenizer.cs
@@ -75,7 +75,7 @@ namespace Lucene.Net.Analysis.Core
         /// Collects only characters which satisfy
         /// <seealso cref="Character#isLetter(int)"/>.
         /// </summary>
-        protected override bool IsTokenChar(char c)
+        protected override bool IsTokenChar(int c)
         {
             return Character.IsLetter(c);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
index 1567daf..5ccdbbf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/WhitespaceTokenizer.cs
@@ -69,9 +69,9 @@ namespace Lucene.Net.Analysis.Core
         /// Collects only characters which do not satisfy
         /// <seealso cref="Character#isWhitespace(int)"/>.
         /// </summary>
-        protected override bool IsTokenChar(char c)
+        protected override bool IsTokenChar(int c)
         {
-            return !char.IsWhiteSpace(c);
+            return !char.IsWhiteSpace((char)c);
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
index 5117267..2de7baa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
@@ -41,7 +41,7 @@ namespace Lucene.Net.Analysis.In
         {
         }
 
-        protected override bool IsTokenChar(char c) // LUCENENET TODO: Change parameter back to int (for codepoint) rather than a single char since this could contain surrogate pairs
+        protected override bool IsTokenChar(int c)
         {
             UnicodeCategory category = Character.GetType(c);
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
index e48c33f..15db0f7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
@@ -1,4 +1,5 @@
 \ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.IO;
@@ -73,9 +74,9 @@ namespace Lucene.Net.Analysis.Ru
         /// Collects only characters which satisfy
         /// <seealso cref="Character#isLetter(int)"/>.
         /// </summary>
-        protected override bool IsTokenChar(char c)
+        protected override bool IsTokenChar(int c)
         {
-            return char.IsLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
+            return Character.IsLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
index b4ea553..14047ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -120,7 +120,7 @@ namespace Lucene.Net.Analysis.Util
         /// predicate. Codepoints for which this is false are used to define token
         /// boundaries and are not included in tokens.
         /// </summary>
-        protected abstract bool IsTokenChar(char c);
+        protected abstract bool IsTokenChar(int c);
 
         /// <summary>
         /// Called on each token character to normalize it before it is added to the
@@ -166,7 +166,7 @@ namespace Lucene.Net.Analysis.Util
                 int charCount = Character.CharCount(c);
                 bufferIndex += charCount;
 
-                if (IsTokenChar((char)c)) // if it's a token char
+                if (IsTokenChar(c)) // if it's a token char
                 {
                     if (length == 0) // start of token
                     {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/053d3efc/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
index d452d83..40ae0bb 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
@@ -273,7 +273,7 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
                 {
                 }
 
-                protected override bool IsTokenChar(char c)
+                protected override bool IsTokenChar(int c)
                 {
                     if (char.IsNumber((char)c))
                     {


[12/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
index 518a5fc..4162a7c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData5.cs
@@ -47,6 +47,708 @@ namespace Lucene.Net.Analysis.En
         private KStemData5()
         {
         }
-        internal static string[] data = new string[] { "lock", "locker", "locket", "lockjaw", "locknut", "lockout", "locks", "locksmith", "lockstitch", "lockup", "loco", "locomotion", "locomotive", "locum", "locus", "locust", "locution", "lode", "lodestar", "lodestone", "lodge", "lodgement", "lodger", "lodging", "lodgings", "lodgment", "loess", "loft", "lofted", "lofty", "log", "loganberry", "logarithm", "logarithmic", "logbook", "logger", "loggerheads", "loggia", "logic", "logical", "logically", "logician", "logistic", "logistics", "logjam", "logrolling", "loin", "loincloth", "loins", "loiter", "loll", "lollipop", "lollop", "lolly", "lone", "lonely", "loner", "lonesome", "long", "longboat", "longbow", "longevity", "longhaired", "longhand", "longheaded", "longhop", "longing", "longish", "longitude", "longitudinal", "longship", "longshoreman", "longsighted", "longstanding", "longstop", "longsuffering", "longueur", "longways", "longwearing", "longwinded", "longwise", "loo", "loofa", "
 loofah", "look", "looker", "lookout", "looks", "loom", "loon", "loony", "loop", "loophole", "loose", "loosebox", "loosen", "loot", "lop", "lope", "loppings", "loquacious", "loquat", "lord", "lordly", "lords", "lordship", "lore", "lorgnette", "lorn", "lorry", "lose", "loser", "loss", "lost", "lot", "loth", "lotion", "lottery", "lotto", "lotus", "loud", "loudhailer", "loudmouth", "loudspeaker", "lough", "lounge", "lounger", "lour", "louse", "lousy", "lout", "louver", "louvre", "lovable", "love", "loveable", "lovebird", "lovechild", "loveless", "lovelorn", "lovely", "lovemaking", "lover", "lovers", "lovesick", "lovey", "loving", "low", "lowborn", "lowbred", "lowbrow", "lowdown", "lower", "lowermost", "lowland", "lowlander", "lowly", "loyal", "loyalist", "loyalty", "lozenge", "lsd", "ltd", "lubber", "lubricant", "lubricate", "lubricator", "lubricious", "lucerne", "lucid", "luck", "luckless", "lucky", "lucrative", "lucre", "ludicrous", "ludo", "luff", "lug", "luggage", "lugger", "lughole
 ", "lugsail", "lugubrious", "lugworm", "lukewarm", "lull", "lullaby", "lumbago", "lumbar", "lumber", "lumberjack", "lumberman", "lumberyard", "luminary", "luminous", "lumme", "lummox", "lummy", "lump", "lumpish", "lumpy", "lunacy", "lunar", "lunate", "lunatic", "lunch", "lunchtime", "lung", "lunge", "lungfish", "lungpower", "lupin", "lurch", "lure", "lurgy", "lurid", "lurk", "luscious", "lush", "lust", "luster", "lustful", "lustre", "lustrous", "lusty", "lutanist", "lute", "lutenist", "luv", "luxuriant", "luxuriate", "luxurious", "luxury", "lychee", "lychgate", "lye", "lymph", "lymphatic", "lynch", "lynx", "lyre", "lyrebird", "lyric", "lyrical", "lyricism", "lyricist", "lyrics", "mac", "macabre", "macadam", "macadamise", "macadamize", "macaroni", "macaroon", "macaw", "mace", "macerate", "mach", "machete", "machiavellian", "machination", "machine", "machinegun", "machinery", "machinist", "mackerel", "mackintosh", "macrobiotic", "macrocosm", "mad", "madam", "madame", "madcap", "madden
 ", "maddening", "madder", "made", "madeira", "mademoiselle", "madhouse", "madly", "madman", "madness", "madonna", "madrigal", "maelstrom", "maenad", "maestro", "mafia", "mag", "magazine", "magenta", "maggot", "maggoty", "magi", "magic", "magical", "magician", "magisterial", "magistracy", "magistrate", "magma", "magnanimity", "magnanimous", "magnate", "magnesia", "magnesium", "magnet", "magnetic", "magnetise", "magnetism", "magnetize", "magneto", "magnificat", "magnification", "magnificent", "magnifier", "magnify", "magniloquent", "magnitude", "magnolia", "magnum", "magpie", "magus", "maharaja", "maharajah", "maharanee", "maharani", "mahatma", "mahlstick", "mahogany", "mahout", "maid", "maiden", "maidenhair", "maidenhead", "maidenhood", "maidenly", "maidservant", "mail", "mailbag", "mailbox", "maim", "main", "mainland", "mainline", "mainly", "mainmast", "mains", "mainsail", "mainspring", "mainstay", "mainstream", "maintain", "maintenance", "maisonette", "maisonnette", "maize", "majes
 tic", "majesty", "majolica", "major", "majordomo", "majorette", "majority", "make", "maker", "makeshift", "making", "makings", "malachite", "maladjusted", "maladministration", "maladroit", "malady", "malaise", "malapropism", "malapropos", "malaria", "malarial", "malay", "malcontent", "malcontented", "male", "malediction", "malefactor", "maleficent", "malevolent", "malfeasance", "malformation", "malformed", "malfunction", "malice", "malicious", "malign", "malignancy", "malignant", "malignity", "malinger", "mall", "mallard", "malleable", "mallet", "mallow", "malmsey", "malnutrition", "malodorous", "malpractice", "malt", "malthusian", "maltreat", "maltster", "mama", "mamba", "mambo", "mamma", "mammal", "mammary", "mammon", "mammoth", "mammy", "man", "manacle", "manage", "manageable", "management", "manager", "manageress", "managerial", "manatee", "mandarin", "mandate", "mandatory", "mandible", "mandolin", "mandrake", "mandrill", "maneuver", "maneuverable", "manful", "manganese", "mange
 ", "manger", "mangle", "mango", "mangosteen", "mangrove", "mangy", "manhandle", "manhole", "manhood", "manhour", "mania", "maniac", "maniacal", "manic", "manicure", "manicurist", "manifest", "manifestation", "manifesto", "manifold", "manikin", "manila", "manilla", "manipulate", "manipulation", "mankind", "manly", "manna", "manned", "mannequin", "manner", "mannered", "mannerism", "mannerly", "manners", "mannikin", "mannish", "manoeuverable", "manoeuvre", "manometer", "manor", "manorial", "manpower", "mansard", "manse", "manservant", "mansion", "mansions", "manslaughter", "mantelpiece", "mantelshelf", "mantilla", "mantis", "mantle", "mantrap", "manual", "manufacture", "manufacturer", "manumit", "manure", "manuscript", "manx", "many", "maoism", "maori", "map", "maple", "mapping", "maquis", "mar", "marabou", "marabout", "maraschino", "marathon", "maraud", "marble", "marbled", "marbles", "marc", "marcasite", "march", "marchioness", "margarine", "margin", "marginal", "marguerite", "marigo
 ld", "marihuana", "marijuana", "marimba", "marina", "marinade", "marinate", "marine", "mariner", "marionette", "marital", "maritime", "marjoram", "mark", "markdown", "marked", "marker", "market", "marketeer", "marketer", "marketing", "marketplace", "marking", "marksman", "marksmanship", "markup", "marl", "marlinespike", "marmalade", "marmoreal", "marmoset", "marmot", "marocain", "maroon", "marquee", "marquess", "marquetry", "marquis", "marriage", "marriageable", "married", "marrow", "marrowbone", "marrowfat", "marry", "mars", "marsala", "marseillaise", "marsh", "marshal", "marshmallow", "marshy", "marsupial", "mart", "marten", "martial", "martian", "martin", "martinet", "martini", "martinmas", "martyr", "martyrdom", "marvel", "marvellous", "marvelous", "marxism", "marzipan", "mascara", "mascot", "masculine", "masculinity", "maser", "mash", "mashie", "mask", "masked", "masochism", "mason", "masonic", "masonry", "masque", "masquerade", "mass", "massacre", "massage", "masses", "masseur
 ", "massif", "massive", "massy", "mast", "mastectomy", "master", "masterful", "masterly", "mastermind", "masterpiece", "mastership", "masterstroke", "mastery", "masthead", "mastic", "masticate", "mastiff", "mastitis", "mastodon", "mastoid", "mastoiditis", "masturbate", "mat", "matador", "match", "matchbox", "matching", "matchless", "matchlock", "matchmaker", "matchstick", "matchwood", "mate", "material", "materialise", "materialism", "materialist", "materialize", "maternal", "maternity", "matey", "mathematician", "mathematics", "matins", "matriarch", "matriarchy", "matricide", "matriculate", "matrimony", "matrix", "matron", "matronly", "matt", "matter", "matting", "mattins", "mattock", "mattress", "maturation", "mature", "maturity", "maudlin", "maul", "maulstick", "maunder", "mausoleum", "mauve", "maverick", "maw", "mawkish", "maxi", "maxim", "maximal", "maximise", "maximize", "maximum", "may", "maybe", "maybeetle", "mayday", "mayfly", "mayhem", "mayonnaise", "mayor", "mayoralty", "
 mayoress", "maypole", "mayst", "maze", "mazed", "mazurka", "mccarthyism", "mead", "meadow", "meadowsweet", "meager", "meagre", "meal", "mealie", "mealtime", "mealy", "mealybug", "mean", "meander", "meanderings", "meaning", "meaningful", "meaningless", "means", "meant", "meantime", "meanwhile", "measles", "measly", "measurable", "measure", "measured", "measureless", "measurement", "meat", "meatball", "meaty", "mecca", "mechanic", "mechanical", "mechanics", "mechanise", "mechanism", "mechanistic", "mechanize", "medal", "medalist", "medallion", "medallist", "meddle", "meddlesome", "media", "mediaeval", "medial", "median", "mediate", "medic", "medical", "medicament", "medicare", "medicate", "medication", "medicinal", "medicine", "medico", "medieval", "mediocre", "mediocrity", "meditate", "meditation", "meditative", "mediterranean", "medium", "medlar", "medley", "meed", "meek", "meerschaum", "meet", "meeting", "meetinghouse", "megadeath", "megahertz", "megalith", "megalithic", "megaloman
 ia", "megalomaniac", "megaphone", "megaton", "megrim", "meiosis", "melancholia", "melancholic", "melancholy", "meld", "melee", "meliorate", "meliorism", "mellifluous", "mellow", "melodic", "melodious", "melodrama", "melodramatic", "melody", "melon", "melt", "melting", "member", "membership", "membrane", "membranous", "memento", "memo", "memoir", "memoirs", "memorabilia", "memorable", "memorandum", "memorial", "memorise", "memorize", "memory", "memsahib", "men", "menace", "menagerie", "mend", "mendacious", "mendacity", "mendelian", "mendicant", "mending", "menfolk", "menial", "meningitis", "meniscus", "menopause", "menses", "menstrual", "menstruate", "mensurable", "mensuration", "mental", "mentality", "menthol", "mentholated", "mention", "mentor", "menu", "meow", "mephistopheles", "mercantile", "mercenary", "mercer", "mercerise", "mercerize", "merchandise", "merchant", "merchantman", "merciful", "merciless", "mercurial", "mercury", "mercy", "mere", "merely", "meretricious", "merge", 
 "merger", "meridian", "meridional", "meringue", "merino", "merit", "meritocracy", "meritorious", "mermaid", "merman", "merriment", "merry", "merrymaking", "mesa", "mescalin", "mescaline", "mesdames", "mesdemoiselles", "meseems", "mesh", "mesmeric", "mesmerise", "mesmerism", "mesmerist", "mesmerize", "mess", "message", "messenger", "messiah", "messianic", "messieurs", "messmate", "messrs", "messuage", "messy", "mestizo", "met", "metabolic", "metabolise", "metabolism", "metabolize", "metacarpal", "metal", "metalanguage", "metallic", "metallurgist", "metallurgy", "metalwork", "metamorphose", "metamorphosis", "metaphor", "metaphorical", "metaphysics", "metatarsal", "mete", "metempsychosis", "meteor", "meteoric", "meteorite", "meteoroid", "meteorologist", "meteorology", "meter", "methane", "methinks", "method", "methodical", "methodism", "methodology", "meths", "methuselah", "meticulous", "metre", "metric", "metrical", "metrication", "metricise", "metricize", "metro", "metronome", "metro
 polis", "metropolitan", "mettle", "mettlesome", "mew", "mews", "mezzanine", "mezzo", "mezzotint", "miaow", "miasma", "mica", "mice", "michaelmas", "mick", "mickey", "microbe", "microbiologist", "microbiology", "microcosm", "microelectronics", "microfiche", "microfilm", "micromesh", "micrometer", "micron", "microorganism", "microphone", "microscope", "microscopic", "microsecond", "microwave", "mid", "midair", "midcourse", "midday", "midden", "middle", "middlebrow", "middleman", "middleweight", "middling", "midge", "midget", "midi", "midland", "midlands", "midmost", "midnight", "midpoint", "midriff", "midshipman", "midships", "midst", "midsummer", "midway", "midweek", "midwest", "midwicket", "midwife", "midwifery", "mien", "miffed", "might", "mightily", "mighty", "mignonette", "migraine", "migrant", "migrate", "migration", "migratory", "mikado", "mike", "milady", "mild", "mildew", "mildly", "mile", "mileage", "mileometer", "miler", "milestone", "milieu", "militancy", "militant", "mili
 tarise", "militarism", "militarize", "military", "militate", "militia", "militiaman", "milk", "milker", "milkmaid", "milkman", "milksop", "milkweed", "milky", "mill", "millboard", "milldam", "millenarian", "millenium", "millepede", "miller", "millet", "millibar", "milligram", "milligramme", "milliliter", "millilitre", "millimeter", "millimetre", "milliner", "millinery", "million", "millionaire", "millipede", "millpond", "millrace", "millstone", "millwheel", "millwright", "milometer", "milord", "milt", "mime", "mimeograph", "mimetic", "mimic", "mimicry", "mimosa", "min", "minaret", "minatory", "mince", "mincemeat", "mincer", "mincingly", "mind", "minded", "mindful", "mindless", "mine", "minefield", "minelayer", "miner", "mineral", "mineralogist", "mineralogy", "minestrone", "minesweeper", "mingle", "mingy", "mini", "miniature", "miniaturist", "minibus", "minim", "minimal", "minimise", "minimize", "minimum", "mining", "minion", "minister", "ministerial", "ministrant", "ministration", 
 "ministry", "miniver", "mink", "minnow", "minor", "minority", "minotaur", "minster", "minstrel", "minstrelsy", "mint", "minuet", "minus", "minuscule", "minute", "minutely", "minuteman", "minutes", "minutia", "minx", "miracle", "miraculous", "mirage", "mire", "mirror", "mirth", "miry", "misadventure", "misadvise", "misalliance", "misanthrope", "misanthropy", "misapplication", "misapply", "misapprehend", "misapprehension", "misappropriate", "misbegotten", "misbehave", "misbehaved", "misbehavior", "misbehaviour", "miscalculate", "miscall", "miscarry", "miscast", "miscegenation", "miscellaneous", "miscellany", "mischance", "mischief", "mischievous", "misconceive", "misconception", "misconduct", "misconstruction", "misconstrue", "miscount", "miscreant", "miscue", "misdate", "misdeal", "misdeed", "misdemeanor", "misdemeanour", "misdirect", "misdoing", "miser", "miserable", "miserably", "miserly", "misery", "misfire", "misfit", "misfortune", "misgiving", "misgovern", "misguide", "misguided
 ", "mishandle", "mishap", "mishear", "mishit", "mishmash", "misinform", "misinterpret", "misjudge", "misjudgement", "misjudgment", "mislay", "mislead", "mismanage", "mismatch", "misname", "misnomer", "misogynist", "misogyny", "misplace", "misprint", "mispronounce", "mispronunciation", "misquote", "misread", "misreport", "misrepresent", "misrule", "miss", "missal", "misshapen", "missile", "missing", "mission", "missionary", "missis", "missive", "misspell", "misspend", "misstate", "misstatement", "missus", "missy", "mist", "mistake", "mistaken", "mister", "mistime", "mistletoe", "mistral", "mistranslate", "mistress", "mistrial", "mistrust", "mistrustful", "mists", "misty", "misunderstand", "misunderstanding", "misuse", "mite", "miter", "mitigate", "mitosis", "mitre", "mitt", "mitten", "mix", "mixed", "mixer", "mixture", "mizen", "mizzen", "mizzenmast", "mizzle", "mnemonic", "mnemonics", "moa", "moan", "moat", "moated", "mob", "mobile", "mobilisation", "mobilise", "mobility", "mobiliza
 tion", "mobilize", "mobster", "moccasin", "mocha", "mock", "mockers", "mockery", "mockingbird", "modal", "mode", "model", "moderate", "moderately", "moderation", "moderations", "moderato", "moderator", "modern", "modernise", "modernism", "modernistic", "modernity", "modernize", "modest", "modesty", "modicum", "modification", "modifier", "modify", "modish", "mods", "modular", "modulate", "modulation", "module", "moggy", "mogul", "moh", "mohair", "mohammedan", "mohammedanism", "moiety", "moist", "moisten", "moisture", "moisturise", "moisturize", "moke", "molar", "molasses", "mold", "molder", "molding", "moldy", "mole", "molecular", "molecule", "molehill", "moleskin", "molest", "moll", "mollify", "mollusc", "mollusk", "mollycoddle", "molt", "molten", "molto", "molybdenum", "mom", "moment", "momentarily", "momentary", "momentous", "moments", "momentum", "momma", "mommy", "monarch", "monarchic", "monarchism", "monarchist", "monarchy", "monastery", "monastic", "monasticism", "monaural", "
 monday", "monetary", "money", "moneybags", "moneybox", "moneychanger", "moneyed", "moneylender", "moneymaker", "moneys", "monger", "mongol", "mongolism", "mongoose", "mongrel", "monies", "monitor", "monk", "monkey", "mono", "monochrome", "monocle", "monogamous", "monogamy", "monogram", "monograph", "monolith", "monolithic", "monolog", "monologue", "monomania", "monomaniac", "mononucleosis", "monophonic", "monophthong", "monoplane", "monopolise", "monopolist", "monopolize", "monopoly", "monorail", "monosyllabic", "monosyllable", "monotheism", "monotone", "monotonous", "monotony", "monotype", "monoxide", "monsieur", "monsignor", "monsoon", "monster", "monstrance", "monstrosity", "monstrous", "montage", "month", "monthly", "monument", "monumental", "monumentally", "moo", "mooch", "moocow", "mood", "moody", "moon", "moonbeam", "mooncalf", "moonlight", "moonlit", "moonshine", "moonstone", "moonstruck", "moony", "moor", "moorhen", "moorings", "moorish", "moorland", "moose", "moot", "mop",
  "mope", "moped", "moppet", "moquette", "moraine", "moral", "morale", "moralise", "moralist", "moralistic", "morality", "moralize", "morally", "morals", "morass", "moratorium", "morbid", "morbidity", "mordant", "more", "morello", "moreover", "mores", "moresque", "morganatic", "morgue", "moribund", "mormon", "mormonism", "morn", "morning", "mornings", "morocco", "moron", "moronic", "morose", "morpheme", "morphemics", "morpheus", "morphine", "morphology", "morrow", "morsel", "mortal", "mortality", "mortally", "mortar", "mortarboard", "mortgage", "mortgagee", "mortgagor", "mortice", "mortician", "mortification", "mortify", "mortise", "mortuary", "mosaic", "moselle", "mosey", "moslem", "mosque", "mosquito", "moss", "mossy", "most", "mostly", "mote", "motel", "motet", "moth", "mothball", "mothballs", "mother", "motherhood", "motherly", "mothproof", "motif", "motion", "motionless", "motions", "motivate", "motivation", "motive", "motley", "motocross", "motor", "motorbike", "motorboat", "mo
 torcade", "motorcar", "motorcycle", "motorcyclist", "motoring", "motorise", "motorist", "motorize", "motorman", "motorway", "mottled", "motto", "mould", "moulder", "moulding", "mouldy", "moult", "mound", "mount", "mountain", "mountaineer", "mountaineering", "mountainous", "mountainside", "mountaintop", "mountebank", "mountie", "mourn", "mourner", "mournful", "mourning", "mouse", "mouser", "mousetrap", "moussaka", "mousse", "moustache", "mousy", "mouth", "mouthful", "mouthorgan", "mouthpiece", "mouthwash", "movable", "move", "moveable", "movement", "movements", "mover", "movie", "movies", "moving", "mow", "mower", "mpg", "mph", "mra", "mrs", "msc", "much", "muchness", "mucilage", "muck", "muckheap", "muckrake", "mucky", "mucous", "mucus", "mud", "muddle", "muddy", "mudflat", "mudguard", "mudpack", "mudslinger", "muesli", "muezzin", "muff", "muffin", "muffle", "muffler", "mufti", "mug", "mugger", "muggins", "muggy", "mugwump", "muhammadan", "muhammadanism", "mulatto", "mulberry", "mul
 ch", "mulct", "mule", "muleteer", "mulish", "mull", "mullah", "mullet", "mulligatawny", "mullion", "mullioned", "multifarious", "multiform", "multilateral", "multilingual", "multimillionaire", "multiple", "multiplex", "multiplication", "multiplicity", "multiply", "multiracial", "multistorey", "multitude", "multitudinous", "mum", "mumble", "mummer", "mummery", "mummify", "mumming", "mummy", "mumps", "munch", "mundane", "municipal", "municipality", "munificence", "munificent", "muniments", "munition", "munitions", "mural", "murder", "murderous", "murk", "murky", "murmur", "murphy", "murrain", "muscatel", "muscle", "muscled", "muscleman", "muscovite", "muscular", "muse", "museum", "mush", "mushroom", "mushy", "music", "musical", "musically", "musician", "musicianship", "musk", "musket", "musketeer", "musketry", "muskmelon", "muskrat", "musky", "muslim", "muslin", "musquash", "muss", "mussel", "must", "mustache", "mustachio", "mustang", "mustard", "muster", "musty", "mutable", "mutant",
  "mutation", "mute", "muted", "mutilate", "mutilation", "mutineer", "mutinous", "mutiny", "mutt", "mutter", "mutton", "muttonchops", "mutual", "mutuality", "muzak", "muzzle", "muzzy", "mycology", "myelitis", "myna", "mynah", "myopia", "myriad", "myrrh", "myrtle", "myself", "mysterious", "mystery", "mystic", "mystical", "mysticism", "mystification", "mystify", "mystique", "myth", "mythical", "mythological", "mythologist", "mythology", "myxomatosis", "nab", "nabob", "nacelle", "nacre", "nadir", "nag", "naiad", "nail", "nailbrush", "naive", "naivete", "naivety", "naked", "name", "namedrop", "nameless", "namely", "nameplate", "namesake", "nanny", "nap", "napalm", "naphtha", "naphthalene", "napkin", "nappy", "narc", "narcissism", "narcissus", "narcotic", "nark", "narky", "narrate", "narration", "narrative", "narrator", "narrow", "narrowly", "narrows", "narwhal", "nasal", "nasalise", "nasalize", "nascent", "nasturtium", "nasty", "natal", "nation", "national", "nationalise", "nationalism",
  "nationalist", "nationalistic", "nationality", "nationalize", "nationwide", "native", "nativity", "nato", "natter", "natty", "natural", "naturalise", "naturalism", "naturalist", "naturalistic", "naturalize", "naturally", "naturalness", "nature", "naturism", "naturopath", "naught", "naughty", "nausea", "nauseate", "nauseous", "nautch", "nautical", "nautilus", "naval", "nave", "navel", "navigable", "navigate", "navigation", "navigator", "navvy", "navy", "nay", "nazi", "nco", "neanderthal", "neapolitan", "near", "nearby", "nearly", "nearside", "nearsighted", "neat", "nebula", "nebular", "nebulous", "necessaries", "necessarily", "necessary", "necessitate", "necessitous", "necessity", "neck", "neckband", "neckerchief", "necklace", "necklet", "neckline", "necktie", "neckwear", "necromancer", "necromancy", "necrophilia", "necrophiliac", "necropolis", "nectar", "nectarine", "need", "needful", "needle", "needless", "needlessly", "needlewoman", "needlework", "needs", "needy", "nefarious", "n
 egate", "negative", "neglect", "neglectful", "negligee", "negligence", "negligent", "negligible", "negotiable", "negotiate", "negotiation", "negress", "negro", "negus", "neigh", "neighbor", "neighborhood", "neighboring", "neighborly", "neighbour", "neighbourhood", "neighbouring", "neighbourly", "neither", "nelson", "nemesis", "neoclassical", "neocolonialism", "neolithic", "neologism", "neon", "neonate", "neophyte", "neoplasm", "nephew", "nephritis", "nepotism", "neptune", "nereid", "nerve", "nerveless", "nerves", "nervous", "nervy", "ness", "nest", "nesting", "nestle", "nestling", "nestor", "net", "netball", "nether", "nethermost", "nets", "nett", "netting", "nettle", "network", "neural", "neuralgia", "neurasthenia", "neurasthenic", "neuritis", "neurologist", "neurology", "neurosis", "neurotic", "neuter", "neutral", "neutralise", "neutrality", "neutralize", "neutralizer", "neutron", "never", "nevermore", "nevertheless", "new", "newborn", "newcomer", "newel", "newfangled", "newfoundl
 and", "newly", "newlywed", "newmarket", "news", "newsagent", "newsboy", "newscast", "newscaster", "newsletter", "newsmonger", "newspaper", "newsprint", "newsreel", "newsroom", "newssheet", "newsstand", "newsvendor", "newsworthy", "newsy", "newt", "newtonian", "next", "nexus", "nhs", "niacin", "nib", "nibble", "niblick", "nibs", "nice", "nicely", "nicety", "niche", "nick", "nickel", "nicker", "nicknack", "nickname", "nicotine", "niece", "niff", "nifty", "niggard", "niggardly", "nigger", "niggle", "niggling", "nigh", "night", "nightcap", "nightclothes", "nightclub", "nightdress", "nightfall", "nighthawk", "nightingale", "nightjar", "nightlife", "nightlight", "nightline", "nightlong", "nightly", "nightmare", "nights", "nightshade", "nightshirt", "nightstick", "nighttime", "nihilism", "nilotic", "nimble", "nimbus", "nimrod", "nincompoop", "nine", "ninepin", "ninepins", "nines", "nineteen", "ninety", "ninny", "ninth", "nip", "nipper", "nippers", "nipping", "nipple", "nippy", "nirvana", "
 nisi", "nit", "niter", "nitpick", "nitpicking", "nitrate", "nitre", "nitric", "nitrochalk", "nitrogen", "nitroglycerin", "nitroglycerine", "nitrous", "nitwit", "nix", "nob", "nobble", "nobility", "noble", "nobleman", "nobly", "nobody", "nocturnal", "nocturne", "nod", "nodal", "noddle", "nodular", "nodule", "noel", "noes", "nog", "noggin", "nohow", "noise", "noisome", "noisy", "nomad", "nomadic", "nomenclature", "nominal", "nominate", "nomination", "nominative", "nominee", "nonage", "nonagenarian", "nonaggression", "nonaligned", "nonalignment", "nonassertive", "nonce", "nonchalance", "nonchalant", "noncombatant", "noncommittal", "nonconductor", "nonconformist", "nonconformity", "noncontributory", "nondescript", "none", "nonentity", "nonesuch", "nonetheless", "nonfiction", "nonflammable", "nonintervention", "nonobservance", "nonpareil", "nonpayment", "nonplus", "nonproliferation", "nonresident", "nonrestrictive", "nonsense", "nonsensical", "nonskid", "nonsmoker", "nonstandard", "nonst
 arter", "nonstick", "nonstop", "nonunion", "nonverbal", "nonviolence", "nonviolent", "nonwhite", "noodle", "nook", "noon", "noonday", "noose", "nope", "nor", "nordic", "norm", "normal", "normalise", "normality", "normalize", "normally", "norman", "normative", "north", "northbound", "northeast", "northeaster", "northeasterly", "northeastern", "northeastward", "northeastwards", "northerly", "northern", "northerner", "northernmost", "northward", "northwards", "northwest", "northwester", "northwesterly", "northwestern", "northwestward", "northwestwards", "nos", "nose", "nosebag", "nosebleed", "nosecone", "nosedive", "nosegay", "nosey", "nosh", "nostalgia", "nostril", "nostrum", "nosy", "not", "notability", "notable", "notably", "notarise", "notarize", "notary", "notation", "notch", "note", "notebook", "notecase", "noted", "notepaper", "noteworthy", "nothing", "nothingness", "notice", "noticeable", "notifiable", "notification", "notify", "notion", "notional", "notions", "notoriety", "not
 orious", "notwithstanding", "nougat", "nought", "noun", "nourish", "nourishment", "nous", "nova", "novel", "novelette", "novelettish", "novelist", "novella", "novelty", "november", "novice", "noviciate", "novitiate", "novocaine", "now", "nowadays", "nowhere", "nowise", "noxious", "nozzle", "nth", "nuance", "nub", "nubile", "nuclear", "nucleus", "nude", "nudge", "nudism", "nudity", "nugatory", "nugget", "nuisance", "null", "nullah", "nullify", "nullity", "numb", "number", "numberless", "numberplate", "numbers", "numbly", "numbskull", "numeracy", "numeral", "numerate", "numeration", "numerator", "numerical", "numerology", "numerous", "numinous", "numismatic", "numismatics", "numskull", "nun", "nuncio", "nunnery", "nuptial", "nuptials", "nurse", "nurseling", "nursemaid", "nursery", "nurseryman", "nursing", "nursling", "nurture", "nut", "nutcase", "nutcracker", "nuthouse", "nutmeg", "nutria", "nutrient", "nutriment", "nutrition", "nutritious", "nutritive", "nuts", "nutshell", "nutty", "
 nuzzle", "nylon", "nylons", "nymph", "nymphet", "nymphomania", "nymphomaniac", "oaf", "oak", "oaken", "oakum", "oap", "oar", "oarlock", "oarsman", "oarsmanship", "oasis", "oat", "oatcake", "oath", "oatmeal", "oats", "obbligato", "obdurate", "obeah", "obedient", "obeisance", "obelisk", "obese", "obey", "obfuscate", "obituary", "object", "objection", "objectionable", "objective", "objector", "oblation", "obligate", "obligation", "obligatory", "oblige", "obliging", "oblique", "obliterate", "oblivion", "oblivious", "oblong", "obloquy", "obnoxious", "oboe", "oboist", "obscene", "obscenity", "obscurantism", "obscure", "obscurity", "obsequies", "obsequious", "observable", "observance", "observant", "observation", "observations", "observatory", "observe", "observer", "observing", "obsess", "obsession", "obsessional", "obsessive", "obsidian", "obsolescent", "obsolete", "obstacle", "obstetrician", "obstetrics", "obstinate", "obstreperous", "obstruct", "obstruction", "obstructionism", "obstruc
 tive", "obtain", "obtainable", "obtrude", "obtrusive", "obtuse", "obverse", "obviate", "obvious", "obviously", "ocarina", "occasion", "occasional", "occident", "occidental", "occult", "occupancy", "occupant", "occupation", "occupational", "occupier", "occupy", "occur", "occurrence", "ocean", "oceangoing", "oceanography", "ocelot", "ocher", "ochre", "octagon", "octane", "octave", "octavo", "octet", "october", "octogenarian", "octopus", "octosyllabic", "ocular", "oculist", "odalisque", "odd", "oddball", "oddity", "oddly", "oddment", "odds", "ode", "odious", "odium", "odor", "odoriferous", "odorous", "odour", "odyssey", "oecumenical", "oecumenicalism", "oesophagus", "oestrogen", "off", "offal", "offbeat", "offence", "offend", "offender", "offense", "offensive", "offer", "offering", "offertory", "offhand", "office", "officeholder", "officer", "offices", "official", "officialdom", "officialese", "officially", "officiate", "officious", "offing", "offish", "offprint", "offset", "offshoot",
  "offshore", "offside", "offspring", "offstage", "oft", "often", "ogle", "ogre", "ohm", "oho", "oil", "oilcake", "oilcan", "oilcloth", "oiled", "oilfield", "oilman", "oilrig", "oils", "oilskin", "oilskins", "oily", "oink", "ointment", "okapi", "okay", "okra", "old", "olden", "oldish", "oldster", "oleaginous", "oleander", "oleograph", "olfactory", "oligarch", "oligarchy", "olive", "olympiad", "olympian", "olympic", "ombudsman", "omega", "omelet", "omelette", "omen", "ominous", "omission", "omit", "omnibus", "omnipotent", "omnipresent", "omniscient", "omnivorous", "once", "oncoming", "one", "onerous", "oneself", "onetime", "ongoing", "onion", "onlooker", "only", "onomatopoeia", "onrush", "onset", "onshore", "onside", "onslaught", "onto", "ontology", "onus", "onward", "onwards", "onyx", "oodles", "oof", "oomph", "oops", "ooze", "opacity", "opal", "opalescent", "opaque", "ope", "open", "opencast", "opener", "openhearted", "opening", "openly", "openwork", "opera", "operable", "operate", 
 "operation", "operational", "operative", "operator", "operetta", "ophthalmia", "ophthalmic", "ophthalmology", "ophthalmoscope", "opiate", "opine", "opinion", "opinionated", "opium", "opossum", "opponent", "opportune", "opportunism", "opportunity", "oppose", "opposite", "opposition", "oppress", "oppression", "oppressive", "oppressor", "opprobrious", "opprobrium", "ops", "opt", "optative", "optic", "optical", "optician", "optics", "optimism", "optimum", "option", "optional", "opulence", "opulent", "opus", "oracle", "oracular", "oral", "orange", "orangeade", "orangeman", "orangutang", "oration", "orator", "oratorical", "oratorio", "oratory", "orb", "orbit", "orchard", "orchestra", "orchestral", "orchestrate", "orchid", "ordain", "ordeal", "order", "ordered", "orderly", "orders", "ordinal", "ordinance", "ordinand", "ordinarily", "ordinary", "ordinate", "ordination", "ordnance", "ordure", "ore", "oregano", "organ", "organdie", "organdy", "organic", "organisation", "organise", "organised"
 , "organism", "organist", "organization", "organize", "organized", "orgasm", "orgiastic", "orgy", "orient", "oriental", "orientalist", "orientate", "orientation", "orifice", "origin", "original", "originality", "originally", "originate", "oriole", "orison", "orlon", "ormolu", "ornament", "ornamental", "ornamentation", "ornate", "ornery", "ornithology", "orotund", "orphan", "orphanage", "orrery", "orrisroot", "orthodontic", "orthodontics", "orthodox", "orthodoxy", "orthography", "orthopaedic", "orthopaedics", "orthopedic", "orthopedics", "ortolan", "oryx", "oscar", "oscillate", "oscillation", "oscillator", "oscillograph", "oscilloscope", "osculation", "osier", "osmosis", "osprey", "osseous", "ossification", "ossify", "ostensible", "ostentation", "osteoarthritis", "osteopath", "osteopathy", "ostler", "ostracise", "ostracize", "ostrich", "other", "otherwise", "otherworldly", "otiose", "otter", "ottoman", "oubliette", "ouch", "ought", "ounce", "our", "ours", "ourselves", "ousel", "oust"
 , "out", "outback", "outbalance", "outbid", "outbound", "outbrave", "outbreak", "outbuilding", "outburst", "outcast", "outcaste", "outclass", "outcome", "outcrop", "outcry", "outdated", "outdistance", "outdo", "outdoor", "outdoors", "outer", "outermost", "outface", "outfall", "outfield", "outfight", "outfit", "outflank", "outflow", "outfox", "outgeneral", "outgoing", "outgoings", "outgrow", "outgrowth", "outhouse", "outing", "outlandish", "outlast", "outlaw", "outlay", "outlet", "outline", "outlive", "outlook", "outlying", "outmaneuver", "outmanoeuvre", "outmarch", "outmatch", "outmoded", "outmost", "outnumber", "outpatient", "outplay", "outpoint", "outpost", "outpourings", "output", "outrage", "outrageous", "outrange", "outrank", "outride", "outrider", "outrigger", "outright", "outrival", "outrun", "outsell", "outset", "outshine", "outside", "outsider", "outsize", "outskirts", "outsmart", "outspoken", "outspread", "outstanding", "outstay", "outstretched", "outstrip", "outtalk", "ou
 tvote", "outward", "outwardly", "outwards", "outwear", "outweigh", "outwit", "outwork", "outworn", "ouzel", "ouzo", "ova", "oval", "ovarian", "ovary", "ovation", "oven", "ovenware", "over", "overact", "overage", "overall", "overalls", "overarch", "overarm", "overawe", "overbalance", "overbear", "overbearing", "overbid", "overblown", "overboard", "overburden", "overcall", "overcapitalise", "overcapitalize", "overcast", "overcharge", "overcloud", "overcoat", "overcome", "overcompensate", "overcrop", "overcrowd", "overdevelop", "overdo", "overdone", "overdose", "overdraft", "overdraw", "overdrawn", "overdress", "overdrive", "overdue", "overestimate", "overexpose", "overflow", "overfly", "overgrown", "overgrowth", "overhand", "overhang", "overhaul", "overhead", "overheads", "overhear", "overjoyed", "overkill", "overland", "overlap", "overlay", "overleaf", "overleap", "overload", "overlong", "overlook", "overlord", "overly", "overman", "overmaster", "overmuch", "overnight", "overpass", "
 overpay", "overplay", "overpopulated", "overpopulation", "overpower", "overpowering", "overprint", "overrate", "overreach", "override", "overriding", "overrule", "overrun", "overseas", "oversee", "overseer", "oversell", "oversexed", "overshadow", "overshoe", "overshoot", "overside", "oversight", "oversimplify", "oversleep", "overspill", "overstate", "overstatement", "overstay", "oversteer", "overstep", "overstock", "overstrung", "overstuffed", "oversubscribed", "overt", "overtake", "overtax", "overthrow", "overtime", "overtone", "overtones", "overtop", "overtrump", "overture", "overtures", "overturn", "overweening", "overweight", "overwhelm", "overwhelming", "overwork", "overwrought", "oviduct", "oviparous", "ovoid", "ovulate", "ovum", "owe", "owl", "owlet", "owlish", "own", "owner", "ownership", "oxbridge", "oxcart", "oxeye", "oxide", "oxidise", "oxidize", "oxon", "oxonian", "oxtail", "oxyacetylene", "oxygen", "oxygenate", "oyez", "oyster", "oystercatcher", "ozone", "pabulum", "pac
 e", "pacemaker", "pacesetter", "pachyderm", "pacific", "pacifier", "pacifism", "pacifist", "pacify", "pack", "package", "packed", "packer", "packet", "packing", "packsaddle", "pact", "pad", "padding", "paddle", "paddock", "paddy", "padlock", "padre", "paean", "paederast", "paederasty", "paediatrician", "paediatrics", "paella", "paeony", "pagan", "paganism", "page", "pageant", "pageantry", "pagination", "pagoda", "paid", "pail", "paillasse", "pain", "pained", "painful", "painkiller", "painless", "pains", "painstaking", "paint", "paintbrush", "painter", "painting", "paints", "paintwork", "pair", "paisley", "pajama", "pajamas", "pal", "palace", "paladin", "palais", "palakeen", "palanquin", "palatable", "palatal", "palatalize", "palate", "palatial", "palatinate", "palaver", "pale", "paleface", "paleography", "paleolithic", "paleontology", "palette", "palfrey", "palimpsest", "palindrome", "paling", "palings", "palisade", "palish", "pall", "palladian", "pallbearer", "pallet", "palliasse",
  "palliate", "palliation", "palliative", "pallid", "pallor", "pally", "palm", "palmer", "palmetto", "palmist", "palmistry", "palmy", "palomino", "palpable", "palpate", "palpitate", "palpitation", "palsied", "palsy", "palter", "paltry", "pampas", "pamper", "pamphlet", "pamphleteer", "pan", "panacea", "panache", "panama", "panatela", "panatella", "pancake", "panchromatic", "pancreas", "panda", "pandemic", "pandemonium", "pander", "pandit", "panegyric", "panel", "paneling", "panelist", "panelling", "panellist", "pang", "panhandle", "panic", "panicky", "panjabi", "panjandrum", "pannier", "pannikin", "panoplied", "panoply", "panorama", "panpipes", "pansy", "pant", "pantaloon", "pantaloons", "pantechnicon", "pantheism", "pantheon", "panther", "panties", "pantile", "panto", "pantograph", "pantomime", "pantry", "pants", "panty", "panzer", "pap", "papa", "papacy", "papadum", "papal", "papaya", "paper", "paperback", "paperboy", "paperhanger", "papers", "paperweight", "paperwork", "papery", "p
 apist", "papoose", "pappy", "paprika", "papyrus", "par", "parable", "parabola", "parachute", "parachutist", "paraclete", "parade", "paradigm", "paradigmatic", "paradise", "paradisiacal", "paradox", "paraffin", "paragon", "paragraph", "parakeet", "parallel", "parallelism", "parallelogram", "paralyse", "paralysis", "paralytic", "paralyze", "paramilitary", "paramount", "paramountcy", "paramour", "paranoia", "paranoiac", "paranoid", "parapet", "paraphernalia", "paraphrase", "paraplegia", "paraplegic", "paraquat", "paras", "parasite", "parasitic", "parasol", "parathyroid", "paratrooper", "paratroops", "paratyphoid", "parboil", "parcel", "parch", "parchment", "pard", "pardon", "pardonable", "pardonably", "pardoner", "pare", "parent", "parentage", "parental", "parenthesis", "parenthetic", "parenthood", "parer", "parhelion", "pariah", "paring", "parish", "parishioner", "parisian", "parity", "park", "parka", "parkin", "parking", "parkland", "parky", "parlance", "parley", "parliament", "parli
 amentarian", "parliamentary", "parlor", "parlour", "parlous", "parmesan", "parochial", "parodist", "parody", "parole", "paroxysm", "parquet", "parr", "parricide", "parrot", "parry", "parse", "parsee", "parsi", "parsimonious", "parsimony", "parsley", "parsnip", "parson", "parsonage", "part", "partake", "parterre", "parthenogenesis", "partial", "partiality", "partially", "participant", "participate", "participation", "participial", "participle", "particle", "particular", "particularise", "particularity", "particularize", "particularly", "particulars", "parting", "partisan", "partita", "partition", "partitive", "partizan", "partly", "partner", "partnership", "partook", "partridge", "parts", "parturition", "party", "parvenu", "paschal", "pasha", "pass", "passable", "passage", "passageway", "passbook", "passenger", "passerby", "passim", "passing", "passion", "passionate", "passionately", "passionflower", "passive", "passivity", "passivize", "passkey", "passover", "passport", "password", 
 "past", "pasta", "paste", "pasteboard", "pastel", "pastern", "pasteurise", "pasteurize", "pastiche", "pastille", "pastime", "pasting", "pastor", "pastoral", "pastorale", "pastorate", "pastrami", "pastry", "pasturage", "pasture", "pasty", "pat", "patch", "patchouli", "patchwork", "patchy", "patella", "patent", "patentee", "patently", "pater", "paterfamilias", "paternal", "paternalism", "paternity", "paternoster", "path", "pathan", "pathetic", "pathfinder", "pathological", "pathologist", "pathology", "pathos", "pathway", "patience", "patient", "patina", "patio", "patisserie", "patois", "patrial", "patriarch", "patriarchal", "patriarchate", "patriarchy", "patrician", "patricide", "patrimony", "patriot", "patriotic", "patriotism", "patrol", "patrolman", "patron", "patronage", "patroness", "patronise", "patronize", "patronymic", "patten", "patter", "pattern", "patty", "paucity", "paunch", "paunchy", "pauper", "pauperise", "pauperism", "pauperize", "pause", "pavan", "pavane", "pave", "pav
 ed", "pavement", "pavilion", "paving", "paw", "pawky", "pawl", "pawn", "pawnbroker", "pawnshop", "pawpaw", "pay", "payable", "payday", "payee", "payer", "payload", "paymaster", "payment", "paynim", "payoff", "payola", "payroll", "pea", "peace", "peaceable", "peaceful", "peacekeeping", "peacemaker", "peacetime", "peach", "peachick", "peacock", "peafowl", "peahen", "peak", "peaked", "peaky", "peal", "peanut", "peanuts", "pear", "pearl", "pearly", "pearmain", "peasant", "peasantry", "peashooter", "peat", "pebble", "pebbledash", "pebbly", "pecan", "peccadillo", "peccary", "peck", "pecker", "peckish", "pectic", "pectin", "pectoral", "peculate", "peculiar", "peculiarity", "peculiarly", "pecuniary", "pedagogue", "pedagogy", "pedal" };
+        internal static string[] data = new string[] {
+            "lock","locker","locket","lockjaw","locknut",
+            "lockout","locks","locksmith","lockstitch","lockup",
+            "loco","locomotion","locomotive","locum","locus",
+            "locust","locution","lode","lodestar","lodestone",
+            "lodge","lodgement","lodger","lodging","lodgings",
+            "lodgment","loess","loft","lofted","lofty",
+            "log","loganberry","logarithm","logarithmic","logbook",
+            "logger","loggerheads","loggia","logic","logical",
+            "logically","logician","logistic","logistics","logjam",
+            "logrolling","loin","loincloth","loins","loiter",
+            "loll","lollipop","lollop","lolly","lone",
+            "lonely","loner","lonesome","long","longboat",
+            "longbow","longevity","longhaired","longhand","longheaded",
+            "longhop","longing","longish","longitude","longitudinal",
+            "longship","longshoreman","longsighted","longstanding","longstop",
+            "longsuffering","longueur","longways","longwearing","longwinded",
+            "longwise","loo","loofa","loofah","look",
+            "looker","lookout","looks","loom","loon",
+            "loony","loop","loophole","loose","loosebox",
+            "loosen","loot","lop","lope","loppings",
+            "loquacious","loquat","lord","lordly","lords",
+            "lordship","lore","lorgnette","lorn","lorry",
+            "lose","loser","loss","lost","lot",
+            "loth","lotion","lottery","lotto","lotus",
+            "loud","loudhailer","loudmouth","loudspeaker","lough",
+            "lounge","lounger","lour","louse","lousy",
+            "lout","louver","louvre","lovable","love",
+            "loveable","lovebird","lovechild","loveless","lovelorn",
+            "lovely","lovemaking","lover","lovers","lovesick",
+            "lovey","loving","low","lowborn","lowbred",
+            "lowbrow","lowdown","lower","lowermost","lowland",
+            "lowlander","lowly","loyal","loyalist","loyalty",
+            "lozenge","lsd","ltd","lubber","lubricant",
+            "lubricate","lubricator","lubricious","lucerne","lucid",
+            "luck","luckless","lucky","lucrative","lucre",
+            "ludicrous","ludo","luff","lug","luggage",
+            "lugger","lughole","lugsail","lugubrious","lugworm",
+            "lukewarm","lull","lullaby","lumbago","lumbar",
+            "lumber","lumberjack","lumberman","lumberyard","luminary",
+            "luminous","lumme","lummox","lummy","lump",
+            "lumpish","lumpy","lunacy","lunar","lunate",
+            "lunatic","lunch","lunchtime","lung","lunge",
+            "lungfish","lungpower","lupin","lurch","lure",
+            "lurgy","lurid","lurk","luscious","lush",
+            "lust","luster","lustful","lustre","lustrous",
+            "lusty","lutanist","lute","lutenist","luv",
+            "luxuriant","luxuriate","luxurious","luxury","lychee",
+            "lychgate","lye","lymph","lymphatic","lynch",
+            "lynx","lyre","lyrebird","lyric","lyrical",
+            "lyricism","lyricist","lyrics","mac","macabre",
+            "macadam","macadamise","macadamize","macaroni","macaroon",
+            "macaw","mace","macerate","mach","machete",
+            "machiavellian","machination","machine","machinegun","machinery",
+            "machinist","mackerel","mackintosh","macrobiotic","macrocosm",
+            "mad","madam","madame","madcap","madden",
+            "maddening","madder","made","madeira","mademoiselle",
+            "madhouse","madly","madman","madness","madonna",
+            "madrigal","maelstrom","maenad","maestro","mafia",
+            "mag","magazine","magenta","maggot","maggoty",
+            "magi","magic","magical","magician","magisterial",
+            "magistracy","magistrate","magma","magnanimity","magnanimous",
+            "magnate","magnesia","magnesium","magnet","magnetic",
+            "magnetise","magnetism","magnetize","magneto","magnificat",
+            "magnification","magnificent","magnifier","magnify","magniloquent",
+            "magnitude","magnolia","magnum","magpie","magus",
+            "maharaja","maharajah","maharanee","maharani","mahatma",
+            "mahlstick","mahogany","mahout","maid","maiden",
+            "maidenhair","maidenhead","maidenhood","maidenly","maidservant",
+            "mail","mailbag","mailbox","maim","main",
+            "mainland","mainline","mainly","mainmast","mains",
+            "mainsail","mainspring","mainstay","mainstream","maintain",
+            "maintenance","maisonette","maisonnette","maize","majestic",
+            "majesty","majolica","major","majordomo","majorette",
+            "majority","make","maker","makeshift","making",
+            "makings","malachite","maladjusted","maladministration","maladroit",
+            "malady","malaise","malapropism","malapropos","malaria",
+            "malarial","malay","malcontent","malcontented","male",
+            "malediction","malefactor","maleficent","malevolent","malfeasance",
+            "malformation","malformed","malfunction","malice","malicious",
+            "malign","malignancy","malignant","malignity","malinger",
+            "mall","mallard","malleable","mallet","mallow",
+            "malmsey","malnutrition","malodorous","malpractice","malt",
+            "malthusian","maltreat","maltster","mama","mamba",
+            "mambo","mamma","mammal","mammary","mammon",
+            "mammoth","mammy","man","manacle","manage",
+            "manageable","management","manager","manageress","managerial",
+            "manatee","mandarin","mandate","mandatory","mandible",
+            "mandolin","mandrake","mandrill","maneuver","maneuverable",
+            "manful","manganese","mange","manger","mangle",
+            "mango","mangosteen","mangrove","mangy","manhandle",
+            "manhole","manhood","manhour","mania","maniac",
+            "maniacal","manic","manicure","manicurist","manifest",
+            "manifestation","manifesto","manifold","manikin","manila",
+            "manilla","manipulate","manipulation","mankind","manly",
+            "manna","manned","mannequin","manner","mannered",
+            "mannerism","mannerly","manners","mannikin","mannish",
+            "manoeuverable","manoeuvre","manometer","manor","manorial",
+            "manpower","mansard","manse","manservant","mansion",
+            "mansions","manslaughter","mantelpiece","mantelshelf","mantilla",
+            "mantis","mantle","mantrap","manual","manufacture",
+            "manufacturer","manumit","manure","manuscript","manx",
+            "many","maoism","maori","map","maple",
+            "mapping","maquis","mar","marabou","marabout",
+            "maraschino","marathon","maraud","marble","marbled",
+            "marbles","marc","marcasite","march","marchioness",
+            "margarine","margin","marginal","marguerite","marigold",
+            "marihuana","marijuana","marimba","marina","marinade",
+            "marinate","marine","mariner","marionette","marital",
+            "maritime","marjoram","mark","markdown","marked",
+            "marker","market","marketeer","marketer","marketing",
+            "marketplace","marking","marksman","marksmanship","markup",
+            "marl","marlinespike","marmalade","marmoreal","marmoset",
+            "marmot","marocain","maroon","marquee","marquess",
+            "marquetry","marquis","marriage","marriageable","married",
+            "marrow","marrowbone","marrowfat","marry","mars",
+            "marsala","marseillaise","marsh","marshal","marshmallow",
+            "marshy","marsupial","mart","marten","martial",
+            "martian","martin","martinet","martini","martinmas",
+            "martyr","martyrdom","marvel","marvellous","marvelous",
+            "marxism","marzipan","mascara","mascot","masculine",
+            "masculinity","maser","mash","mashie","mask",
+            "masked","masochism","mason","masonic","masonry",
+            "masque","masquerade","mass","massacre","massage",
+            "masses","masseur","massif","massive","massy",
+            "mast","mastectomy","master","masterful","masterly",
+            "mastermind","masterpiece","mastership","masterstroke","mastery",
+            "masthead","mastic","masticate","mastiff","mastitis",
+            "mastodon","mastoid","mastoiditis","masturbate","mat",
+            "matador","match","matchbox","matching","matchless",
+            "matchlock","matchmaker","matchstick","matchwood","mate",
+            "material","materialise","materialism","materialist","materialize",
+            "maternal","maternity","matey","mathematician","mathematics",
+            "matins","matriarch","matriarchy","matricide","matriculate",
+            "matrimony","matrix","matron","matronly","matt",
+            "matter","matting","mattins","mattock","mattress",
+            "maturation","mature","maturity","maudlin","maul",
+            "maulstick","maunder","mausoleum","mauve","maverick",
+            "maw","mawkish","maxi","maxim","maximal",
+            "maximise","maximize","maximum","may","maybe",
+            "maybeetle","mayday","mayfly","mayhem","mayonnaise",
+            "mayor","mayoralty","mayoress","maypole","mayst",
+            "maze","mazed","mazurka","mccarthyism","mead",
+            "meadow","meadowsweet","meager","meagre","meal",
+            "mealie","mealtime","mealy","mealybug","mean",
+            "meander","meanderings","meaning","meaningful","meaningless",
+            "means","meant","meantime","meanwhile","measles",
+            "measly","measurable","measure","measured","measureless",
+            "measurement","meat","meatball","meaty","mecca",
+            "mechanic","mechanical","mechanics","mechanise","mechanism",
+            "mechanistic","mechanize","medal","medalist","medallion",
+            "medallist","meddle","meddlesome","media","mediaeval",
+            "medial","median","mediate","medic","medical",
+            "medicament","medicare","medicate","medication","medicinal",
+            "medicine","medico","medieval","mediocre","mediocrity",
+            "meditate","meditation","meditative","mediterranean","medium",
+            "medlar","medley","meed","meek","meerschaum",
+            "meet","meeting","meetinghouse","megadeath","megahertz",
+            "megalith","megalithic","megalomania","megalomaniac","megaphone",
+            "megaton","megrim","meiosis","melancholia","melancholic",
+            "melancholy","meld","melee","meliorate","meliorism",
+            "mellifluous","mellow","melodic","melodious","melodrama",
+            "melodramatic","melody","melon","melt","melting",
+            "member","membership","membrane","membranous","memento",
+            "memo","memoir","memoirs","memorabilia","memorable",
+            "memorandum","memorial","memorise","memorize","memory",
+            "memsahib","men","menace","menagerie","mend",
+            "mendacious","mendacity","mendelian","mendicant","mending",
+            "menfolk","menial","meningitis","meniscus","menopause",
+            "menses","menstrual","menstruate","mensurable","mensuration",
+            "mental","mentality","menthol","mentholated","mention",
+            "mentor","menu","meow","mephistopheles","mercantile",
+            "mercenary","mercer","mercerise","mercerize","merchandise",
+            "merchant","merchantman","merciful","merciless","mercurial",
+            "mercury","mercy","mere","merely","meretricious",
+            "merge","merger","meridian","meridional","meringue",
+            "merino","merit","meritocracy","meritorious","mermaid",
+            "merman","merriment","merry","merrymaking","mesa",
+            "mescalin","mescaline","mesdames","mesdemoiselles","meseems",
+            "mesh","mesmeric","mesmerise","mesmerism","mesmerist",
+            "mesmerize","mess","message","messenger","messiah",
+            "messianic","messieurs","messmate","messrs","messuage",
+            "messy","mestizo","met","metabolic","metabolise",
+            "metabolism","metabolize","metacarpal","metal","metalanguage",
+            "metallic","metallurgist","metallurgy","metalwork","metamorphose",
+            "metamorphosis","metaphor","metaphorical","metaphysics","metatarsal",
+            "mete","metempsychosis","meteor","meteoric","meteorite",
+            "meteoroid","meteorologist","meteorology","meter","methane",
+            "methinks","method","methodical","methodism","methodology",
+            "meths","methuselah","meticulous","metre","metric",
+            "metrical","metrication","metricise","metricize","metro",
+            "metronome","metropolis","metropolitan","mettle","mettlesome",
+            "mew","mews","mezzanine","mezzo","mezzotint",
+            "miaow","miasma","mica","mice","michaelmas",
+            "mick","mickey","microbe","microbiologist","microbiology",
+            "microcosm","microelectronics","microfiche","microfilm","micromesh",
+            "micrometer","micron","microorganism","microphone","microscope",
+            "microscopic","microsecond","microwave","mid","midair",
+            "midcourse","midday","midden","middle","middlebrow",
+            "middleman","middleweight","middling","midge","midget",
+            "midi","midland","midlands","midmost","midnight",
+            "midpoint","midriff","midshipman","midships","midst",
+            "midsummer","midway","midweek","midwest","midwicket",
+            "midwife","midwifery","mien","miffed","might",
+            "mightily","mighty","mignonette","migraine","migrant",
+            "migrate","migration","migratory","mikado","mike",
+            "milady","mild","mildew","mildly","mile",
+            "mileage","mileometer","miler","milestone","milieu",
+            "militancy","militant","militarise","militarism","militarize",
+            "military","militate","militia","militiaman","milk",
+            "milker","milkmaid","milkman","milksop","milkweed",
+            "milky","mill","millboard","milldam","millenarian",
+            "millenium","millepede","miller","millet","millibar",
+            "milligram","milligramme","milliliter","millilitre","millimeter",
+            "millimetre","milliner","millinery","million","millionaire",
+            "millipede","millpond","millrace","millstone","millwheel",
+            "millwright","milometer","milord","milt","mime",
+            "mimeograph","mimetic","mimic","mimicry","mimosa",
+            "min","minaret","minatory","mince","mincemeat",
+            "mincer","mincingly","mind","minded","mindful",
+            "mindless","mine","minefield","minelayer","miner",
+            "mineral","mineralogist","mineralogy","minestrone","minesweeper",
+            "mingle","mingy","mini","miniature","miniaturist",
+            "minibus","minim","minimal","minimise","minimize",
+            "minimum","mining","minion","minister","ministerial",
+            "ministrant","ministration","ministry","miniver","mink",
+            "minnow","minor","minority","minotaur","minster",
+            "minstrel","minstrelsy","mint","minuet","minus",
+            "minuscule","minute","minutely","minuteman","minutes",
+            "minutia","minx","miracle","miraculous","mirage",
+            "mire","mirror","mirth","miry","misadventure",
+            "misadvise","misalliance","misanthrope","misanthropy","misapplication",
+            "misapply","misapprehend","misapprehension","misappropriate","misbegotten",
+            "misbehave","misbehaved","misbehavior","misbehaviour","miscalculate",
+            "miscall","miscarry","miscast","miscegenation","miscellaneous",
+            "miscellany","mischance","mischief","mischievous","misconceive",
+            "misconception","misconduct","misconstruction","misconstrue","miscount",
+            "miscreant","miscue","misdate","misdeal","misdeed",
+            "misdemeanor","misdemeanour","misdirect","misdoing","miser",
+            "miserable","miserably","miserly","misery","misfire",
+            "misfit","misfortune","misgiving","misgovern","misguide",
+            "misguided","mishandle","mishap","mishear","mishit",
+            "mishmash","misinform","misinterpret","misjudge","misjudgement",
+            "misjudgment","mislay","mislead","mismanage","mismatch",
+            "misname","misnomer","misogynist","misogyny","misplace",
+            "misprint","mispronounce","mispronunciation","misquote","misread",
+            "misreport","misrepresent","misrule","miss","missal",
+            "misshapen","missile","missing","mission","missionary",
+            "missis","missive","misspell","misspend","misstate",
+            "misstatement","missus","missy","mist","mistake",
+            "mistaken","mister","mistime","mistletoe","mistral",
+            "mistranslate","mistress","mistrial","mistrust","mistrustful",
+            "mists","misty","misunderstand","misunderstanding","misuse",
+            "mite","miter","mitigate","mitosis","mitre",
+            "mitt","mitten","mix","mixed","mixer",
+            "mixture","mizen","mizzen","mizzenmast","mizzle",
+            "mnemonic","mnemonics","moa","moan","moat",
+            "moated","mob","mobile","mobilisation","mobilise",
+            "mobility","mobilization","mobilize","mobster","moccasin",
+            "mocha","mock","mockers","mockery","mockingbird",
+            "modal","mode","model","moderate","moderately",
+            "moderation","moderations","moderato","moderator","modern",
+            "modernise","modernism","modernistic","modernity","modernize",
+            "modest","modesty","modicum","modification","modifier",
+            "modify","modish","mods","modular","modulate",
+            "modulation","module","moggy","mogul","moh",
+            "mohair","mohammedan","mohammedanism","moiety","moist",
+            "moisten","moisture","moisturise","moisturize","moke",
+            "molar","molasses","mold","molder","molding",
+            "moldy","mole","molecular","molecule","molehill",
+            "moleskin","molest","moll","mollify","mollusc",
+            "mollusk","mollycoddle","molt","molten","molto",
+            "molybdenum","mom","moment","momentarily","momentary",
+            "momentous","moments","momentum","momma","mommy",
+            "monarch","monarchic","monarchism","monarchist","monarchy",
+            "monastery","monastic","monasticism","monaural","monday",
+            "monetary","money","moneybags","moneybox","moneychanger",
+            "moneyed","moneylender","moneymaker","moneys","monger",
+            "mongol","mongolism","mongoose","mongrel","monies",
+            "monitor","monk","monkey","mono","monochrome",
+            "monocle","monogamous","monogamy","monogram","monograph",
+            "monolith","monolithic","monolog","monologue","monomania",
+            "monomaniac","mononucleosis","monophonic","monophthong","monoplane",
+            "monopolise","monopolist","monopolize","monopoly","monorail",
+            "monosyllabic","monosyllable","monotheism","monotone","monotonous",
+            "monotony","monotype","monoxide","monsieur","monsignor",
+            "monsoon","monster","monstrance","monstrosity","monstrous",
+            "montage","month","monthly","monument","monumental",
+            "monumentally","moo","mooch","moocow","mood",
+            "moody","moon","moonbeam","mooncalf","moonlight",
+            "moonlit","moonshine","moonstone","moonstruck","moony",
+            "moor","moorhen","moorings","moorish","moorland",
+            "moose","moot","mop","mope","moped",
+            "moppet","moquette","moraine","moral","morale",
+            "moralise","moralist","moralistic","morality","moralize",
+            "morally","morals","morass","moratorium","morbid",
+            "morbidity","mordant","more","morello","moreover",
+            "mores","moresque","morganatic","morgue","moribund",
+            "mormon","mormonism","morn","morning","mornings",
+            "morocco","moron","moronic","morose","morpheme",
+            "morphemics","morpheus","morphine","morphology","morrow",
+            "morsel","mortal","mortality","mortally","mortar",
+            "mortarboard","mortgage","mortgagee","mortgagor","mortice",
+            "mortician","mortification","mortify","mortise","mortuary",
+            "mosaic","moselle","mosey","moslem","mosque",
+            "mosquito","moss","mossy","most","mostly",
+            "mote","motel","motet","moth","mothball",
+            "mothballs","mother","motherhood","motherly","mothproof",
+            "motif","motion","motionless","motions","motivate",
+            "motivation","motive","motley","motocross","motor",
+            "motorbike","motorboat","motorcade","motorcar","motorcycle",
+            "motorcyclist","motoring","motorise","motorist","motorize",
+            "motorman","motorway","mottled","motto","mould",
+            "moulder","moulding","mouldy","moult","mound",
+            "mount","mountain","mountaineer","mountaineering","mountainous",
+            "mountainside","mountaintop","mountebank","mountie","mourn",
+            "mourner","mournful","mourning","mouse","mouser",
+            "mousetrap","moussaka","mousse","moustache","mousy",
+            "mouth","mouthful","mouthorgan","mouthpiece","mouthwash",
+            "movable","move","moveable","movement","movements",
+            "mover","movie","movies","moving","mow",
+            "mower","mpg","mph","mra","mrs",
+            "msc","much","muchness","mucilage","muck",
+            "muckheap","muckrake","mucky","mucous","mucus",
+            "mud","muddle","muddy","mudflat","mudguard",
+            "mudpack","mudslinger","muesli","muezzin","muff",
+            "muffin","muffle","muffler","mufti","mug",
+            "mugger","muggins","muggy","mugwump","muhammadan",
+            "muhammadanism","mulatto","mulberry","mulch","mulct",
+            "mule","muleteer","mulish","mull","mullah",
+            "mullet","mulligatawny","mullion","mullioned","multifarious",
+            "multiform","multilateral","multilingual","multimillionaire","multiple",
+            "multiplex","multiplication","multiplicity","multiply","multiracial",
+            "multistorey","multitude","multitudinous","mum","mumble",
+            "mummer","mummery","mummify","mumming","mummy",
+            "mumps","munch","mundane","municipal","municipality",
+            "munificence","munificent","muniments","munition","munitions",
+            "mural","murder","murderous","murk","murky",
+            "murmur","murphy","murrain","muscatel","muscle",
+            "muscled","muscleman","muscovite","muscular","muse",
+            "museum","mush","mushroom","mushy","music",
+            "musical","musically","musician","musicianship","musk",
+            "musket","musketeer","musketry","muskmelon","muskrat",
+            "musky","muslim","muslin","musquash","muss",
+            "mussel","must","mustache","mustachio","mustang",
+            "mustard","muster","musty","mutable","mutant",
+            "mutation","mute","muted","mutilate","mutilation",
+            "mutineer","mutinous","mutiny","mutt","mutter",
+            "mutton","muttonchops","mutual","mutuality","muzak",
+            "muzzle","muzzy","mycology","myelitis","myna",
+            "mynah","myopia","myriad","myrrh","myrtle",
+            "myself","mysterious","mystery","mystic","mystical",
+            "mysticism","mystification","mystify","mystique","myth",
+            "mythical","mythological","mythologist","mythology","myxomatosis",
+            "nab","nabob","nacelle","nacre","nadir",
+            "nag","naiad","nail","nailbrush","naive",
+            "naivete","naivety","naked","name","namedrop",
+            "nameless","namely","nameplate","namesake","nanny",
+            "nap","napalm","naphtha","naphthalene","napkin",
+            "nappy","narc","narcissism","narcissus","narcotic",
+            "nark","narky","narrate","narration","narrative",
+            "narrator","narrow","narrowly","narrows","narwhal",
+            "nasal","nasalise","nasalize","nascent","nasturtium",
+            "nasty","natal","nation","national","nationalise",
+            "nationalism","nationalist","nationalistic","nationality","nationalize",
+            "nationwide","native","nativity","nato","natter",
+            "natty","natural","naturalise","naturalism","naturalist",
+            "naturalistic","naturalize","naturally","naturalness","nature",
+            "naturism","naturopath","naught","naughty","nausea",
+            "nauseate","nauseous","nautch","nautical","nautilus",
+            "naval","nave","navel","navigable","navigate",
+            "navigation","navigator","navvy","navy","nay",
+            "nazi","nco","neanderthal","neapolitan","near",
+            "nearby","nearly","nearside","nearsighted","neat",
+            "nebula","nebular","nebulous","necessaries","necessarily",
+            "necessary","necessitate","necessitous","necessity","neck",
+            "neckband","neckerchief","necklace","necklet","neckline",
+            "necktie","neckwear","necromancer","necromancy","necrophilia",
+            "necrophiliac","necropolis","nectar","nectarine","need",
+            "needful","needle","needless","needlessly","needlewoman",
+            "needlework","needs","needy","nefarious","negate",
+            "negative","neglect","neglectful","negligee","negligence",
+            "negligent","negligible","negotiable","negotiate","negotiation",
+            "negress","negro","negus","neigh","neighbor",
+            "neighborhood","neighboring","neighborly","neighbour","neighbourhood",
+            "neighbouring","neighbourly","neither","nelson","nemesis",
+            "neoclassical","neocolonialism","neolithic","neologism","neon",
+            "neonate","neophyte","neoplasm","nephew","nephritis",
+            "nepotism","neptune","nereid","nerve","nerveless",
+            "nerves","nervous","nervy","ness","nest",
+            "nesting","nestle","nestling","nestor","net",
+            "netball","nether","nethermost","nets","nett",
+            "netting","nettle","network","neural","neuralgia",
+            "neurasthenia","neurasthenic","neuritis","neurologist","neurology",
+            "neurosis","neurotic","neuter","neutral","neutralise",
+            "neutrality","neutralize","neutralizer","neutron","never",
+            "nevermore","nevertheless","new","newborn","newcomer",
+            "newel","newfangled","newfoundland","newly","newlywed",
+            "newmarket","news","newsagent","newsboy","newscast",
+            "newscaster","newsletter","newsmonger","newspaper","newsprint",
+            "newsreel","newsroom","newssheet","newsstand","newsvendor",
+            "newsworthy","newsy","newt","newtonian","next",
+            "nexus","nhs","niacin","nib","nibble",
+            "niblick","nibs","nice","nicely","nicety",
+            "niche","nick","nickel","nicker","nicknack",
+            "nickname","nicotine","niece","niff","nifty",
+            "niggard","niggardly","nigger","niggle","niggling",
+            "nigh","night","nightcap","nightclothes","nightclub",
+            "nightdress","nightfall","nighthawk","nightingale","nightjar",
+            "nightlife","nightlight","nightline","nightlong","nightly",
+            "nightmare","nights","nightshade","nightshirt","nightstick",
+            "nighttime","nihilism","nilotic","nimble","nimbus",
+            "nimrod","nincompoop","nine","ninepin","ninepins",
+            "nines","nineteen","ninety","ninny","ninth",
+            "nip","nipper","nippers","nipping","nipple",
+            "nippy","nirvana","nisi","nit","niter",
+            "nitpick","nitpicking","nitrate","nitre","nitric",
+            "nitrochalk","nitrogen","nitroglycerin","nitroglycerine","nitrous",
+            "nitwit","nix","nob","nobble","nobility",
+            "noble","nobleman","nobly","nobody","nocturnal",
+            "nocturne","nod","nodal","noddle","nodular",
+            "nodule","noel","noes","nog","noggin",
+            "nohow","noise","noisome","noisy","nomad",
+            "nomadic","nomenclature","nominal","nominate","nomination",
+            "nominative","nominee","nonage","nonagenarian","nonaggression",
+            "nonaligned","nonalignment","nonassertive","nonce","nonchalance",
+            "nonchalant","noncombatant","noncommittal","nonconductor","nonconformist",
+            "nonconformity","noncontributory","nondescript","none","nonentity",
+            "nonesuch","nonetheless","nonfiction","nonflammable","nonintervention",
+            "nonobservance","nonpareil","nonpayment","nonplus","nonproliferation",
+            "nonresident","nonrestrictive","nonsense","nonsensical","nonskid",
+            "nonsmoker","nonstandard","nonstarter","nonstick","nonstop",
+            "nonunion","nonverbal","nonviolence","nonviolent","nonwhite",
+            "noodle","nook","noon","noonday","noose",
+            "nope","nor","nordic","norm","normal",
+            "normalise","normality","normalize","normally","norman",
+            "normative","north","northbound","northeast","northeaster",
+            "northeasterly","northeastern","northeastward","northeastwards","northerly",
+            "northern","northerner","northernmost","northward","northwards",
+            "northwest","northwester","northwesterly","northwestern","northwestward",
+            "northwestwards","nos","nose","nosebag","nosebleed",
+            "nosecone","nosedive","nosegay","nosey","nosh",
+            "nostalgia","nostril","nostrum","nosy","not",
+            "notability","notable","notably","notarise","notarize",
+            "notary","notation","notch","note","notebook",
+            "notecase","noted","notepaper","noteworthy","nothing",
+            "nothingness","notice","noticeable","notifiable","notification",
+            "notify","notion","notional","notions","notoriety",
+            "notorious","notwithstanding","nougat","nought","noun",
+            "nourish","nourishment","nous","nova","novel",
+            "novelette","novelettish","novelist","novella","novelty",
+            "november","novice","noviciate","novitiate","novocaine",
+            "now","nowadays","nowhere","nowise","noxious",
+            "nozzle","nth","nuance","nub","nubile",
+            "nuclear","nucleus","nude","nudge","nudism",
+            "nudity","nugatory","nugget","nuisance","null",
+            "nullah","nullify","nullity","numb","number",
+            "numberless","numberplate","numbers","numbly","numbskull",
+            "numeracy","numeral","numerate","numeration","numerator",
+            "numerical","numerology","numerous","numinous","numismatic",
+            "numismatics","numskull","nun","nuncio","nunnery",
+            "nuptial","nuptials","nurse","nurseling","nursemaid",
+            "nursery","nurseryman","nursing","nursling","nurture",
+            "nut","nutcase","nutcracker","nuthouse","nutmeg",
+            "nutria","nutrient","nutriment","nutrition","nutritious",
+            "nutritive","nuts","nutshell","nutty","nuzzle",
+            "nylon","nylons","nymph","nymphet","nymphomania",
+            "nymphomaniac","oaf","oak","oaken","oakum",
+            "oap","oar","oarlock","oarsman","oarsmanship",
+            "oasis","oat","oatcake","oath","oatmeal",
+            "oats","obbligato","obdurate","obeah","obedient",
+            "obeisance","obelisk","obese","obey","obfuscate",
+            "obituary","object","objection","objectionable","objective",
+            "objector","oblation","obligate","obligation","obligatory",
+            "oblige","obliging","oblique","obliterate","oblivion",
+            "oblivious","oblong","obloquy","obnoxious","oboe",
+            "oboist","obscene","obscenity","obscurantism","obscure",
+            "obscurity","obsequies","obsequious","observable","observance",
+            "observant","observation","observations","observatory","observe",
+            "observer","observing","obsess","obsession","obsessional",
+            "obsessive","obsidian","obsolescent","obsolete","obstacle",
+            "obstetrician","obstetrics","obstinate","obstreperous","obstruct",
+            "obstruction","obstructionism","obstructive","obtain","obtainable",
+            "obtrude","obtrusive","obtuse","obverse","obviate",
+            "obvious","obviously","ocarina","occasion","occasional",
+            "occident","occidental","occult","occupancy","occupant",
+            "occupation","occupational","occupier","occupy","occur",
+            "occurrence","ocean","oceangoing","oceanography","ocelot",
+            "ocher","ochre","octagon","octane","octave",
+            "octavo","octet","october","octogenarian","octopus",
+            "octosyllabic","ocular","oculist","odalisque","odd",
+            "oddball","oddity","oddly","oddment","odds",
+            "ode","odious","odium","odor","odoriferous",
+            "odorous","odour","odyssey","oecumenical","oecumenicalism",
+            "oesophagus","oestrogen","off","offal","offbeat",
+            "offence","offend","offender","offense","offensive",
+            "offer","offering","offertory","offhand","office",
+            "officeholder","officer","offices","official","officialdom",
+            "officialese","officially","officiate","officious","offing",
+            "offish","offprint","offset","offshoot","offshore",
+            "offside","offspring","offstage","oft","often",
+            "ogle","ogre","ohm","oho","oil",
+            "oilcake","oilcan","oilcloth","oiled","oilfield",
+            "oilman","oilrig","oils","oilskin","oilskins",
+            "oily","oink","ointment","okapi","okay",
+            "okra","old","olden","oldish","oldster",
+            "oleaginous","oleander","oleograph","olfactory","oligarch",
+            "oligarchy","olive","olympiad","olympian","olympic",
+            "ombudsman","omega","omelet","omelette","omen",
+            "ominous","omission","omit","omnibus","omnipotent",
+            "omnipresent","omniscient","omnivorous","once","oncoming",
+            "one","onerous","oneself","onetime","ongoing",
+            "onion","onlooker","only","onomatopoeia","onrush",
+            "onset","onshore","onside","onslaught","onto",
+            "ontology","onus","onward","onwards","onyx",
+            "oodles","oof","oomph","oops","ooze",
+            "opacity","opal","opalescent","opaque","ope",
+            "open","opencast","opener","openhearted","opening",
+            "openly","openwork","opera","operable","operate",
+            "operation","operational","operative","operator","operetta",
+            "ophthalmia","ophthalmic","ophthalmology","ophthalmoscope","opiate",
+            "opine","opinion","opinionated","opium","opossum",
+            "opponent","opportune","opportunism","opportunity","oppose",
+            "opposite","opposition","oppress","oppression","oppressive",
+            "oppressor","opprobrious","opprobrium","ops","opt",
+            "optative","optic","optical","optician","optics",
+            "optimism","optimum","option","optional","opulence",
+            "opulent","opus","oracle","oracular","oral",
+            "orange","orangeade","orangeman","orangutang","oration",
+            "orator","oratorical","oratorio","oratory","orb",
+            "orbit","orchard","orchestra","orchestral","orchestrate",
+            "orchid","ordain","ordeal","order","ordered",
+            "orderly","orders","ordinal","ordinance","ordinand",
+            "ordinarily","ordinary","ordinate","ordination","ordnance",
+            "ordure","ore","oregano","organ","organdie",
+            "organdy","organic","organisation","organise","organised",
+            "organism","organist","organization","organize","organized",
+            "orgasm","orgiastic","orgy","orient","oriental",
+            "orientalist","orientate","orientation","orifice","origin",
+            "original","originality","originally","originate","oriole",
+            "orison","orlon","ormolu","ornament","ornamental",
+            "ornamentation","ornate","ornery","ornithology","orotund",
+            "orphan","orphanage","orrery","orrisroot","orthodontic",
+            "orthodontics","orthodox","orthodoxy","orthography","orthopaedic",
+            "orthopaedics","orthopedic","orthopedics","ortolan","oryx",
+            "oscar","oscillate","oscillation","oscillator","oscillograph",
+            "oscilloscope","osculation","osier","osmosis","osprey",
+            "osseous","ossification","ossify","ostensible","ostentation",
+            "osteoarthritis","osteopath","osteopathy","ostler","ostracise",
+            "ostracize","ostrich","other","otherwise","otherworldly",
+            "otiose","otter","ottoman","oubliette","ouch",
+            "ought","ounce","our","ours","ourselves",
+            "ousel","oust","out","outback","outbalance",
+            "outbid","outbound","outbrave","outbreak","outbuilding",
+            "outburst","outcast","outcaste","outclass","outcome",
+            "outcrop","outcry","outdated","outdistance","outdo",
+            "outdoor","outdoors","outer","outermost","outface",
+            "outfall","outfield","outfight","outfit","outflank",
+            "outflow","outfox","outgeneral","outgoing","outgoings",
+            "outgrow","outgrowth","outhouse","outing","outlandish",
+            "outlast","outlaw","outlay","outlet","outline",
+            "outlive","outlook","outlying","outmaneuver","outmanoeuvre",
+            "outmarch","outmatch","outmoded","outmost","outnumber",
+            "outpatient","outplay","outpoint","outpost","outpourings",
+            "output","outrage","outrageous","outrange","outrank",
+            "outride","outrider","outrigger","outright","outrival",
+            "outrun","outsell","outset","outshine","outside",
+            "outsider","outsize","outskirts","outsmart","outspoken",
+            "outspread","outstanding","outstay","outstretched","outstrip",
+            "outtalk","outvote","outward","outwardly","outwards",
+            "outwear","outweigh","outwit","outwork","outworn",
+            "ouzel","ouzo","ova","oval","ovarian",
+            "ovary","ovation","oven","ovenware","over",
+            "overact","overage","overall","overalls","overarch",
+            "overarm","overawe","overbalance","overbear","overbearing",
+            "overbid","overblown","overboard","overburden","overcall",
+            "overcapitalise","overcapitalize","overcast","overcharge","overcloud",
+            "overcoat","overcome","overcompensate","overcrop","overcrowd",
+            "overdevelop","overdo","overdone","overdose","overdraft",
+            "overdraw","overdrawn","overdress","overdrive","overdue",
+            "overestimate","overexpose","overflow","overfly","overgrown",
+            "overgrowth","overhand","overhang","overhaul","overhead",
+            "overheads","overhear","overjoyed","overkill","overland",
+            "overlap","overlay","overleaf","overleap","overload",
+            "overlong","overlook","overlord","overly","overman",
+            "overmaster","overmuch","overnight","overpass","overpay",
+            "overplay","overpopulated","overpopulation","overpower","overpowering",
+            "overprint","overrate","overreach","override","overriding",
+            "overrule","overrun","overseas","oversee","overseer",
+            "oversell","oversexed","overshadow","overshoe","overshoot",
+            "overside","oversight","oversimplify","oversleep","overspill",
+            "overstate","overstatement","overstay","oversteer","overstep",
+            "overstock","overstrung","overstuffed","oversubscribed","overt",
+            "overtake","overtax","overthrow","overtime","overtone",
+            "overtones","overtop","overtrump","overture","overtures",
+            "overturn","overweening","overweight","overwhelm","overwhelming",
+            "overwork","overwrought","oviduct","oviparous","ovoid",
+            "ovulate","ovum","owe","owl","owlet",
+            "owlish","own","owner","ownership","oxbridge",
+            "oxcart","oxeye","oxide","oxidise","oxidize",
+            "oxon","oxonian","oxtail","oxyacetylene","oxygen",
+            "oxygenate","oyez","oyster","oystercatcher","ozone",
+            "pabulum","pace","pacemaker","pacesetter","pachyderm",
+            "pacific","pacifier","pacifism","pacifist","pacify",
+            "pack","package","packed","packer","packet",
+            "packing","packsaddle","pact","pad","padding",
+            "paddle","paddock","paddy","padlock","padre",
+            "paean","paederast","paederasty","paediatrician","paediatrics",
+            "paella","paeony","pagan","paganism","page",
+            "pageant","pageantry","pagination","pagoda","paid",
+            "pail","paillasse","pain","pained","painful",
+            "painkiller","painless","pains","painstaking","paint",
+            "paintbrush","painter","painting","paints","paintwork",
+            "pair","paisley","pajama","pajamas","pal",
+            "palace","paladin","palais","palakeen","palanquin",
+            "palatable","palatal","palatalize","palate","palatial",
+            "palatinate","palaver","pale","paleface","paleography",
+            "paleolithic","paleontology","palette","palfrey","palimpsest",
+            "palindrome","paling","palings","palisade","palish",
+            "pall","palladian","pallbearer","pallet","palliasse",
+            "palliate","palliation","palliative","pallid","pallor",
+            "pally","palm","palmer","palmetto","palmist",
+            "palmistry","palmy","palomino","palpable","palpate",
+            "palpitate","palpitation","palsied","palsy","palter",
+            "paltry","pampas","pamper","pamphlet","pamphleteer",
+            "pan","panacea","panache","panama","panatela",
+            "panatella","pancake","panchromatic","pancreas","panda",
+            "pandemic","pandemonium","pander","pandit","panegyric",
+            "panel","paneling","panelist","panelling","panellist",
+            "pang","panhandle","panic","panicky","panjabi",
+            "panjandrum","pannier","pannikin","panoplied","panoply",
+            "panorama","panpipes","pansy","pant","pantaloon",
+            "pantaloons","pantechnicon","pantheism","pantheon","panther",
+            "panties","pantile","panto","pantograph","pantomime",
+            "pantry","pants","panty","panzer","pap",
+            "papa","papacy","papadum","papal","papaya",
+            "paper","paperback","paperboy","paperhanger","papers",
+            "paperweight","paperwork","papery","papist","papoose",
+            "pappy","paprika","papyrus","par","parable",
+            "parabola","parachute","parachutist","paraclete","parade",
+            "paradigm","paradigmatic","paradise","paradisiacal","paradox",
+            "paraffin","paragon","paragraph","parakeet","parallel",
+            "parallelism","parallelogram","paralyse","paralysis","paralytic",
+            "paralyze","paramilitary","paramount","paramountcy","paramour",
+            "paranoia","paranoiac","paranoid","parapet","paraphernalia",
+            "paraphrase","paraplegia","paraplegic","paraquat","paras",
+            "parasite","parasitic","parasol","parathyroid","paratrooper",
+            "paratroops","paratyphoid","parboil","parcel","parch",
+            "parchment","pard","pardon","pardonable","pardonably",
+            "pardoner","pare","parent","parentage","parental",
+            "parenthesis","parenthetic","parenthood","parer","parhelion",
+            "pariah","paring","parish","parishioner","parisian",
+            "parity","park","parka","parkin","parking",
+            "parkland","parky","parlance","parley","parliament",
+            "parliamentarian","parliamentary","parlor","parlour","parlous",
+            "parmesan","parochial","parodist","parody","parole",
+            "paroxysm","parquet","parr","parricide","parrot",
+            "parry","parse","parsee","parsi","parsimonious",
+            "parsimony","parsley","parsnip","parson","parsonage",
+            "part","partake","parterre","parthenogenesis","partial",
+            "partiality","partially","participant","participate","participation",
+            "participial","participle","particle","particular","particularise",
+            "particularity","particularize","particularly","particulars","parting",
+            "partisan","partita","partition","partitive","partizan",
+            "partly","partner","partnership","partook","partridge",
+            "parts","parturition","party","parvenu","paschal",
+            "pasha","pass","passable","passage","passageway",
+            "passbook","passenger","passerby","passim","passing",
+            "passion","passionate","passionately","passionflower","passive",
+            "passivity","passivize","passkey","passover","passport",
+            "password","past","pasta","paste","pasteboard",
+            "pastel","pastern","pasteurise","pasteurize","pastiche",
+            "pastille","pastime","pasting","pastor","pastoral",
+            "pastorale","pastorate","pastrami","pastry","pasturage",
+            "pasture","pasty","pat","patch","patchouli",
+            "patchwork","patchy","patella","patent","patentee",
+            "patently","pater","paterfamilias","paternal","paternalism",
+            "paternity","paternoster","path","pathan","pathetic",
+            "pathfinder","pathological","pathologist","pathology","pathos",
+            "pathway","patience","patient","patina","patio",
+            "patisserie","patois","patrial","patriarch","patriarchal",
+            "patriarchate","patriarchy","patrician","patricide","patrimony",
+            "patriot","patriotic","patriotism","patrol","patrolman",
+            "patron","patronage","patroness","patronise","patronize",
+            "patronymic","patten","patter","pattern","patty",
+            "paucity","paunch","paunchy","pauper","pauperise",
+            "pauperism","pauperize","pause","pavan","pavane",
+            "pave","paved","pavement","pavilion","paving",
+            "paw","pawky","pawl","pawn","pawnbroker",
+            "pawnshop","pawpaw","pay","payable","payday",
+            "payee","payer","payload","paymaster","payment",
+            "paynim","payoff","payola","payroll","pea",
+            "peace","peaceable","peaceful","peacekeeping","peacemaker",
+            "peacetime","peach","peachick","peacock","peafowl",
+            "peahen","peak","peaked","peaky","peal",
+            "peanut","peanuts","pear","pearl","pearly",
+            "pearmain","peasant","peasantry","peashooter","peat",
+            "pebble","pebbledash","pebbly","pecan","peccadillo",
+            "peccary","peck","pecker","peckish","pectic",
+            "pectin","pectoral","peculate","peculiar","peculiarity",
+            "peculiarly","pecuniary","pedagogue","pedagogy","pedal",
+        };
+
     }
 }
\ No newline at end of file


[26/50] [abbrv] lucenenet git commit: Added reminder to change AcceptsDocsOutOfOrder to a property.

Posted by sy...@apache.org.
Added reminder to change AcceptsDocsOutOfOrder to a property.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/694676c9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/694676c9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/694676c9

Branch: refs/heads/analysis-work
Commit: 694676c9de8d0cabe0818d38a806cbd80566ac65
Parents: 8b210c0
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 08:05:33 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 08:05:33 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Search/Collector.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/694676c9/src/Lucene.Net.Core/Search/Collector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/Collector.cs b/src/Lucene.Net.Core/Search/Collector.cs
index b98d05d..57deb0b 100644
--- a/src/Lucene.Net.Core/Search/Collector.cs
+++ b/src/Lucene.Net.Core/Search/Collector.cs
@@ -170,6 +170,6 @@ namespace Lucene.Net.Search
         /// order, so it's important to return <code>true</code>
         /// here.
         /// </summary>
-        public abstract bool AcceptsDocsOutOfOrder();
+        public abstract bool AcceptsDocsOutOfOrder(); // LUCENENET TODO: Change this to a property getter
     }
 }
\ No newline at end of file


[07/50] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests

Posted by sy...@apache.org.
Ported Analysis.Hunspell + tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e4d9f44c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e4d9f44c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e4d9f44c

Branch: refs/heads/analysis-work
Commit: e4d9f44c38c55f32811722c4e98343340223072d
Parents: d9fb43e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 10:19:33 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 11:34:09 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Hunspell/Dictionary.cs             | 2377 +++++++++---------
 .../Analysis/Hunspell/HunspellStemFilter.cs     |  302 ++-
 .../Hunspell/HunspellStemFilterFactory.cs       |  174 +-
 .../Analysis/Hunspell/ISO8859_14Decoder.cs      |  131 +-
 .../Analysis/Hunspell/Stemmer.cs                |  898 ++++---
 .../Lucene.Net.Analysis.Common.csproj           |    5 +
 .../Support/IDictionaryExtensions.cs            |    2 +-
 .../Analysis/Hunspell/StemmerTestBase.cs        |  132 +-
 .../Analysis/Hunspell/TestAllDictionaries.cs    |  146 +-
 .../Analysis/Hunspell/TestAllDictionaries2.cs   |    3 +-
 .../Analysis/Hunspell/TestCaseInsensitive.cs    |  136 +-
 .../Analysis/Hunspell/TestCircumfix.cs          |   50 +-
 .../Analysis/Hunspell/TestComplexPrefix.cs      |   53 +-
 .../Analysis/Hunspell/TestCondition.cs          |   59 +-
 .../Analysis/Hunspell/TestConv.cs               |   47 +-
 .../Analysis/Hunspell/TestDependencies.cs       |   52 +-
 .../Analysis/Hunspell/TestDictionary.cs         |  602 +++--
 .../Analysis/Hunspell/TestEscaped.cs            |   48 +-
 .../Analysis/Hunspell/TestFlagLong.cs           |   42 +-
 .../Analysis/Hunspell/TestFlagNum.cs            |   42 +-
 .../Analysis/Hunspell/TestHomonyms.cs           |   38 +-
 .../Analysis/Hunspell/TestHunspellStemFilter.cs |  308 ++-
 .../Hunspell/TestHunspellStemFilterFactory.cs   |   73 +-
 .../Analysis/Hunspell/TestIgnore.cs             |   46 +-
 .../Analysis/Hunspell/TestMorph.cs              |   42 +-
 .../Analysis/Hunspell/TestOptionalCondition.cs  |   60 +-
 .../Analysis/Hunspell/TestStemmer.cs            |  132 +-
 .../Analysis/Hunspell/TestTwoFold.cs            |   48 +-
 .../Analysis/Hunspell/TestTwoSuffixes.cs        |   46 +-
 .../Analysis/Hunspell/broken-flags.aff          |   21 +
 .../Analysis/Hunspell/broken.aff                |   24 +
 .../Analysis/Hunspell/circumfix.aff             |   14 +
 .../Analysis/Hunspell/circumfix.dic             |    2 +
 .../Analysis/Hunspell/complexprefix.aff         |   12 +
 .../Analysis/Hunspell/complexprefix.dic         |    2 +
 .../Analysis/Hunspell/compressed-before-set.aff |   29 +
 .../Hunspell/compressed-empty-alias.aff         |   30 +
 .../Analysis/Hunspell/compressed.aff            |   29 +
 .../Analysis/Hunspell/compressed.dic            |    9 +
 .../Analysis/Hunspell/condition.aff             |   13 +
 .../Analysis/Hunspell/condition.dic             |    4 +
 .../Analysis/Hunspell/conv.aff                  |   16 +
 .../Analysis/Hunspell/conv.dic                  |    2 +
 .../Analysis/Hunspell/dependencies.aff          |   13 +
 .../Analysis/Hunspell/dependencies.dic          |    3 +
 .../Analysis/Hunspell/escaped.aff               |    4 +
 .../Analysis/Hunspell/escaped.dic               |    4 +
 .../Analysis/Hunspell/flaglong.aff              |    4 +
 .../Analysis/Hunspell/flaglong.dic              |    2 +
 .../Analysis/Hunspell/flagnum.aff               |    4 +
 .../Analysis/Hunspell/flagnum.dic               |    3 +
 .../Analysis/Hunspell/homonyms.aff              |    7 +
 .../Analysis/Hunspell/homonyms.dic              |    3 +
 .../Analysis/Hunspell/ignore.aff                |    6 +
 .../Analysis/Hunspell/ignore.dic                |    3 +
 .../Analysis/Hunspell/mixedcase.dic             |   10 +
 .../Analysis/Hunspell/morph.aff                 |    4 +
 .../Analysis/Hunspell/morph.dic                 |    2 +
 .../Analysis/Hunspell/optional-condition.aff    |   14 +
 .../Analysis/Hunspell/simple.aff                |   20 +
 .../Analysis/Hunspell/simple.dic                |   10 +
 .../Analysis/Hunspell/twofold.aff               |    7 +
 .../Analysis/Hunspell/twosuffixes.aff           |    7 +
 .../Analysis/Hunspell/twosuffixes.dic           |    2 +
 .../Util/BaseTokenStreamFactoryTestCase.cs      |    2 +-
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   57 +
 66 files changed, 3446 insertions(+), 3046 deletions(-)
----------------------------------------------------------------------



[28/50] [abbrv] lucenenet git commit: Fixed missing Reset() call that caused Queries.Function.ValueSources.TFValueSource.TestTF() to fail.

Posted by sy...@apache.org.
Fixed missing Reset() call that caused Queries.Function.ValueSources.TFValueSource.TestTF() to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/11cf1b87
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/11cf1b87
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/11cf1b87

Branch: refs/heads/analysis-work
Commit: 11cf1b8789c3cb30d8ad0316a4859a875c2fdac0
Parents: 1727c1b
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 09:57:05 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 09:57:05 2016 +0700

----------------------------------------------------------------------
 .../Function/ValueSources/TFValueSource.cs                   | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/11cf1b87/src/Lucene.Net.Queries/Function/ValueSources/TFValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSources/TFValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSources/TFValueSource.cs
index 4e8efb0..aff074b 100644
--- a/src/Lucene.Net.Queries/Function/ValueSources/TFValueSource.cs
+++ b/src/Lucene.Net.Queries/Function/ValueSources/TFValueSource.cs
@@ -81,19 +81,13 @@ namespace Lucene.Net.Queries.Function.ValueSources
                 this.terms = terms;
                 this.similarity = similarity;
                 lastDocRequested = -1;
+                Reset();
             }
 
             private DocsEnum docs;
             private int atDoc;
             private int lastDocRequested;
 
-            //JAVA TO C# CONVERTER TODO TASK: Initialization blocks declared within anonymous inner classes are not converted:
-            //	  {
-            //		  reset();
-            //	  }
-
-            //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-            //ORIGINAL LINE: public void reset() throws java.io.IOException
             public virtual void Reset()
             {
                 // no one should call us for deleted docs?


[37/50] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
index 7aa8a77..1feb390 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestCompoundWordTokenFilter.cs
@@ -1,7 +1,16 @@
-\ufeffnamespace org.apache.lucene.analysis.compound
+\ufeffusing Lucene.Net.Analysis.CharFilters;
+using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Compound
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,395 +27,370 @@
 	 * limitations under the License.
 	 */
 
+    public class TestCompoundWordTokenFilter : BaseTokenStreamTestCase
+    {
+
+        private static CharArraySet makeDictionary(params string[] dictionary)
+        {
+            return new CharArraySet(TEST_VERSION_CURRENT, Arrays.AsList(dictionary), true);
+        }
 
-	using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
-	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-	using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
-	using WhitespaceTokenizer = org.apache.lucene.analysis.core.WhitespaceTokenizer;
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using Attribute = org.apache.lucene.util.Attribute;
-	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
-	using InputSource = org.xml.sax.InputSource;
-
-	public class TestCompoundWordTokenFilter : BaseTokenStreamTestCase
-	{
-
-	  private static CharArraySet makeDictionary(params string[] dictionary)
-	  {
-		return new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(dictionary), true);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationCompoundWordsDA() throws Exception
-	  public virtual void testHyphenationCompoundWordsDA()
-	  {
-		CharArraySet dict = makeDictionary("l�se", "hest");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en l�sehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-		assertTokenStreamContents(tf, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "l�sehest", "l�se", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
-	  }
+        [Test]
+        public virtual void TestHyphenationCompoundWordsDA()
+        {
+            CharArraySet dict = makeDictionary("l�se", "hest");
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationCompoundWordsDELongestMatch() throws Exception
-	  public virtual void testHyphenationCompoundWordsDELongestMatch()
-	  {
-		CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		// the word basket will not be added due to the longest match option
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basketball", "ball", "kurv"}, new int[] {1, 0, 0, 0});
-
-	  }
-
-	  /// <summary>
-	  /// With hyphenation-only, you can get a lot of nonsense tokens.
-	  /// This can be controlled with the min/max subword size.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
-	  public virtual void testHyphenationOnly()
-	  {
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-
-		HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);
-
-		// min=2, max=4
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
-
-		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);
-
-		// min=4, max=6
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "sket", "ball", "lkurv", "kurv"});
-
-		tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);
-
-		// min=4, max=10
-		assertTokenStreamContents(tf, new string[] {"basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv"});
-
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDumbCompoundWordsSE() throws Exception
-	  public virtual void testDumbCompoundWordsSE()
-	  {
-		CharArraySet dict = makeDictionary("Bil", "D�rr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "�gon", "Fodral", "Bas", "Fiol", "Makare", "Ges�ll", "Sko", "Vind", "Rute", "Torkare", "Blad");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Bild�rr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glas�gonfodral Basfiolsfodral Basfiolsfodralmakareges�ll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), MockTokenizer.WHITESPACE, false), dict);
-
-		assertTokenStreamContents(tf, new string[] {"Bild�rr", "Bil", "d�rr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glas�gonfodral", "Glas", "�gon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakareges�ll", "Bas", "fiol", "fodral", "makare", "ges�ll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba"}, new int[] {0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156}, new int[] {7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160}, new int[] {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0
 , 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDumbCompoundWordsSELongestMatch() throws Exception
-	  public virtual void testDumbCompoundWordsSELongestMatch()
-	  {
-		CharArraySet dict = makeDictionary("Bil", "D�rr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "�gon", "Fodral", "Bas", "Fiols", "Makare", "Ges�ll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Basfiolsfodralmakareges�ll"), MockTokenizer.WHITESPACE, false), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
-
-		assertTokenStreamContents(tf, new string[] {"Basfiolsfodralmakareges�ll", "Bas", "fiolsfodral", "fodral", "makare", "ges�ll"}, new int[] {0, 0, 0, 0, 0, 0}, new int[] {26, 26, 26, 26, 26, 26}, new int[] {1, 0, 0, 0, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception
-	  public virtual void testTokenEndingWithWordComponentOfMinimumLength()
-	  {
-		CharArraySet dict = makeDictionary("ab", "cd", "ef");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdef")
-		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-		assertTokenStreamContents(tf, new string[] {"abcdef", "ab", "cd", "ef"}, new int[] {0, 0, 0, 0}, new int[] {6, 6, 6, 6}, new int[] {1, 0, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testWordComponentWithLessThanMinimumLength() throws Exception
-	  public virtual void testWordComponentWithLessThanMinimumLength()
-	  {
-		CharArraySet dict = makeDictionary("abc", "d", "efg");
-
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")
-		   ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-	  // since "d" is shorter than the minimum subword size, it should not be added to the token stream
-		assertTokenStreamContents(tf, new string[] {"abcdefg", "abc", "efg"}, new int[] {0, 0, 0}, new int[] {7, 7, 7}, new int[] {1, 0, 0});
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testReset() throws Exception
-	  public virtual void testReset()
-	  {
-		CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "�berwachung");
-
-		Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleisch�berwachungsgesetz"));
-		DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-
-		CharTermAttribute termAtt = tf.getAttribute(typeof(CharTermAttribute));
-		tf.reset();
-		assertTrue(tf.incrementToken());
-		assertEquals("Rindfleisch�berwachungsgesetz", termAtt.ToString());
-		assertTrue(tf.incrementToken());
-		assertEquals("Rind", termAtt.ToString());
-		tf.end();
-		tf.close();
-		wsTokenizer.Reader = new StringReader("Rindfleisch�berwachungsgesetz");
-		tf.reset();
-		assertTrue(tf.incrementToken());
-		assertEquals("Rindfleisch�berwachungsgesetz", termAtt.ToString());
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRetainMockAttribute() throws Exception
-	  public virtual void testRetainMockAttribute()
-	  {
-		CharArraySet dict = makeDictionary("abc", "d", "efg");
-		Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
-		TokenStream stream = new MockRetainAttributeFilter(tokenizer);
-		stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
-		MockRetainAttribute retAtt = stream.addAttribute(typeof(MockRetainAttribute));
-		stream.reset();
-		while (stream.incrementToken())
-		{
-		  assertTrue("Custom attribute value was lost", retAtt.Retain);
-		}
-
-	  }
-
-	  public interface MockRetainAttribute : Attribute
-	  {
-		bool Retain {set;get;}
-	  }
-
-	  public sealed class MockRetainAttributeImpl : AttributeImpl, MockRetainAttribute
-	  {
-		internal bool retain = false;
-		public override void clear()
-		{
-		  retain = false;
-		}
-		public bool Retain
-		{
-			get
-			{
-			  return retain;
-			}
-			set
-			{
-			  this.retain = value;
-			}
-		}
-		public override void copyTo(AttributeImpl target)
-		{
-		  MockRetainAttribute t = (MockRetainAttribute) target;
-		  t.Retain = retain;
-		}
-	  }
-
-	  private class MockRetainAttributeFilter : TokenFilter
-	  {
-
-		internal MockRetainAttribute retainAtt = addAttribute(typeof(MockRetainAttribute));
-
-		internal MockRetainAttributeFilter(TokenStream input) : base(input)
-		{
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-		public override bool incrementToken()
-		{
-		  if (input.incrementToken())
-		  {
-			retainAtt.Retain = true;
-			return true;
-		  }
-		  else
-		  {
-		  return false;
-		  }
-		}
-	  }
-
-	  // SOLR-2891
-	  // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
-	  // wrt original text if a previous filter increases the length of the word (in this case � -> ue)
-	  // so in this case we behave like WDF, and preserve any modified offsets
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testInvalidOffsets() throws Exception
-	  public virtual void testInvalidOffsets()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("fall");
-		CharArraySet dict = makeDictionary("fall");
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
-		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-		builder.add("�", "ue");
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap normMap = builder.build();
-		NormalizeCharMap normMap = builder.build();
-
-		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);
-
-		assertAnalyzesTo(analyzer, "bank�berfall", new string[] {"bankueberfall", "fall"}, new int[] {0, 0}, new int[] {12, 12});
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-		  private NormalizeCharMap normMap;
-
-		  public AnalyzerAnonymousInnerClassHelper(TestCompoundWordTokenFilter outerInstance, CharArraySet dict, NormalizeCharMap normMap)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-			  this.normMap = normMap;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-
-		  protected internal override Reader initReader(string fieldName, Reader reader)
-		  {
-			return new MappingCharFilter(normMap, reader);
-		  }
-	  }
-
-	  /// <summary>
-	  /// blast some random strings through the analyzer </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRandomStrings() throws Exception
-	  public virtual void testRandomStrings()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
-		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
-		checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-
-		  public AnalyzerAnonymousInnerClassHelper2(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
-		  }
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private HyphenationTree hyphenator;
-
-		  public AnalyzerAnonymousInnerClassHelper3(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.hyphenator = hyphenator;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testEmptyTerm() throws Exception
-	  public virtual void testEmptyTerm()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict);
-		checkOneTerm(a, "", "");
-
-		InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.compound.hyphenation.HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
-		HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);
-		checkOneTerm(b, "", "");
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private CharArraySet dict;
-
-		  public AnalyzerAnonymousInnerClassHelper4(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.dict = dict;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
-		  }
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
-	  {
-		  private readonly TestCompoundWordTokenFilter outerInstance;
-
-		  private HyphenationTree hyphenator;
-
-		  public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.hyphenator = hyphenator;
-		  }
-
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
-			return new TokenStreamComponents(tokenizer, filter);
-		  }
-	  }
-	}
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
 
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("min veninde som er lidt af en l�sehest"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+                AssertTokenStreamContents(tf, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "l�sehest", "l�se", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
+            }
+        }
+
+        [Test]
+        public virtual void TestHyphenationCompoundWordsDELongestMatch()
+        {
+            CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+
+                // the word basket will not be added due to the longest match option
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basketball", "ball", "kurv" }, new int[] { 1, 0, 0, 0 });
+            }
+        }
+
+        /// <summary>
+        /// With hyphenation-only, you can get a lot of nonsense tokens.
+        /// This can be controlled with the min/max subword size.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationOnly()
+        {
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+
+                HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 2, 4);
+
+                // min=2, max=4
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });
+
+                tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 6);
+
+                // min=4, max=6
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" });
+
+                tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), hyphenator, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, 4, 10);
+
+                // min=4, max=10
+                AssertTokenStreamContents(tf, new string[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" });
+            }
+        }
+
+        [Test]
+        public virtual void TestDumbCompoundWordsSE()
+        {
+            CharArraySet dict = makeDictionary("Bil", "D�rr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "�gon", "Fodral", "Bas", "Fiol", "Makare", "Ges�ll", "Sko", "Vind", "Rute", "Torkare", "Blad");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Bild�rr Bilmotor Biltak Slagborr Hammarborr Pelarborr Glas�gonfodral Basfiolsfodral Basfiolsfodralmakareges�ll Skomakare Vindrutetorkare Vindrutetorkarblad abba"), MockTokenizer.WHITESPACE, false), dict);
+
+            AssertTokenStreamContents(tf, new string[] { "Bild�rr", "Bil", "d�rr", "Bilmotor", "Bil", "motor", "Biltak", "Bil", "tak", "Slagborr", "Slag", "borr", "Hammarborr", "Hammar", "borr", "Pelarborr", "Pelar", "borr", "Glas�gonfodral", "Glas", "�gon", "fodral", "Basfiolsfodral", "Bas", "fiol", "fodral", "Basfiolsfodralmakareges�ll", "Bas", "fiol", "fodral", "makare", "ges�ll", "Skomakare", "Sko", "makare", "Vindrutetorkare", "Vind", "rute", "torkare", "Vindrutetorkarblad", "Vind", "rute", "blad", "abba" }, new int[] { 0, 0, 0, 8, 8, 8, 17, 17, 17, 24, 24, 24, 33, 33, 33, 44, 44, 44, 54, 54, 54, 54, 69, 69, 69, 69, 84, 84, 84, 84, 84, 84, 111, 111, 111, 121, 121, 121, 121, 137, 137, 137, 137, 156 }, new int[] { 7, 7, 7, 16, 16, 16, 23, 23, 23, 32, 32, 32, 43, 43, 43, 53, 53, 53, 68, 68, 68, 68, 83, 83, 83, 83, 110, 110, 110, 110, 110, 110, 120, 120, 120, 136, 136, 136, 136, 155, 155, 155, 155, 160 }, new int[] { 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
  0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 });
+        }
+
+        [Test]
+        public virtual void TestDumbCompoundWordsSELongestMatch()
+        {
+            CharArraySet dict = makeDictionary("Bil", "D�rr", "Motor", "Tak", "Borr", "Slag", "Hammar", "Pelar", "Glas", "�gon", "Fodral", "Bas", "Fiols", "Makare", "Ges�ll", "Sko", "Vind", "Rute", "Torkare", "Blad", "Fiolsfodral");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("Basfiolsfodralmakareges�ll"), MockTokenizer.WHITESPACE, false), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, true);
+
+            AssertTokenStreamContents(tf, new string[] { "Basfiolsfodralmakareges�ll", "Bas", "fiolsfodral", "fodral", "makare", "ges�ll" }, new int[] { 0, 0, 0, 0, 0, 0 }, new int[] { 26, 26, 26, 26, 26, 26 }, new int[] { 1, 0, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestTokenEndingWithWordComponentOfMinimumLength()
+        {
+            CharArraySet dict = makeDictionary("ab", "cd", "ef");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdef")
+               ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            AssertTokenStreamContents(tf, new string[] { "abcdef", "ab", "cd", "ef" }, new int[] { 0, 0, 0, 0 }, new int[] { 6, 6, 6, 6 }, new int[] { 1, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestWordComponentWithLessThanMinimumLength()
+        {
+            CharArraySet dict = makeDictionary("abc", "d", "efg");
+
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg")
+               ), dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            // since "d" is shorter than the minimum subword size, it should not be added to the token stream
+            AssertTokenStreamContents(tf, new string[] { "abcdefg", "abc", "efg" }, new int[] { 0, 0, 0 }, new int[] { 7, 7, 7 }, new int[] { 1, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestReset()
+        {
+            CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz", "Aufgabe", "�berwachung");
+
+            Tokenizer wsTokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Rindfleisch�berwachungsgesetz"));
+            DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, wsTokenizer, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+
+            ICharTermAttribute termAtt = tf.GetAttribute<ICharTermAttribute>();
+            tf.Reset();
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rindfleisch�berwachungsgesetz", termAtt.ToString());
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rind", termAtt.ToString());
+            tf.End();
+            tf.Dispose();
+            wsTokenizer.Reader = new StringReader("Rindfleisch�berwachungsgesetz");
+            tf.Reset();
+            assertTrue(tf.IncrementToken());
+            assertEquals("Rindfleisch�berwachungsgesetz", termAtt.ToString());
+        }
+
+        [Test]
+        public virtual void TestRetainMockAttribute()
+        {
+            CharArraySet dict = makeDictionary("abc", "d", "efg");
+            Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcdefg"));
+            TokenStream stream = new MockRetainAttributeFilter(tokenizer);
+            stream = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, stream, dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
+            IMockRetainAttribute retAtt = stream.AddAttribute<IMockRetainAttribute>();
+            stream.Reset();
+            while (stream.IncrementToken())
+            {
+                assertTrue("Custom attribute value was lost", retAtt.Retain);
+            }
+
+        }
+
+        public interface IMockRetainAttribute : IAttribute
+        {
+            bool Retain { set; get; }
+        }
+
+        public sealed class MockRetainAttribute : Attribute, IMockRetainAttribute
+        {
+            internal bool retain = false;
+            public override void Clear()
+            {
+                retain = false;
+            }
+            public bool Retain
+            {
+                get
+                {
+                    return retain;
+                }
+                set
+                {
+                    this.retain = value;
+                }
+            }
+            public override void CopyTo(Attribute target)
+            {
+                IMockRetainAttribute t = (IMockRetainAttribute)target;
+                t.Retain = retain;
+            }
+        }
+
+        private class MockRetainAttributeFilter : TokenFilter
+        {
+
+            internal IMockRetainAttribute retainAtt;
+
+            internal MockRetainAttributeFilter(TokenStream input)
+                    : base(input)
+            {
+                retainAtt = AddAttribute<IMockRetainAttribute>();
+            }
+
+            public override sealed bool IncrementToken()
+            {
+                if (input.IncrementToken())
+                {
+                    retainAtt.Retain = true;
+                    return true;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+
+        // SOLR-2891
+        // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
+        // wrt original text if a previous filter increases the length of the word (in this case � -> ue)
+        // so in this case we behave like WDF, and preserve any modified offsets
+        [Test]
+        public virtual void TestInvalidOffsets()
+        {
+            CharArraySet dict = makeDictionary("fall");
+            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+            builder.Add("�", "ue");
+            NormalizeCharMap normMap = builder.Build();
+
+            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);
+
+            AssertAnalyzesTo(analyzer, "bank�berfall", new string[] { "bankueberfall", "fall" }, new int[] { 0, 0 }, new int[] { 12, 12 });
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+            private NormalizeCharMap normMap;
+
+            public AnalyzerAnonymousInnerClassHelper(TestCompoundWordTokenFilter outerInstance, CharArraySet dict, NormalizeCharMap normMap)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+                this.normMap = normMap;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenFilter filter = new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+
+            public override TextReader InitReader(string fieldName, TextReader reader)
+            {
+                return new MappingCharFilter(normMap, reader);
+            }
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dict);
+            CheckRandomData(Random(), a, 1000 * RANDOM_MULTIPLIER);
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+                Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this, hyphenator);
+                CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER);
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+
+            public AnalyzerAnonymousInnerClassHelper2(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private HyphenationTree hyphenator;
+
+            public AnalyzerAnonymousInnerClassHelper3(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+            {
+                this.outerInstance = outerInstance;
+                this.hyphenator = hyphenator;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+        }
+
+        [Test]
+        public virtual void TestEmptyTerm()
+        {
+            CharArraySet dict = makeDictionary("a", "e", "i", "o", "u", "y", "bc", "def");
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this, dict);
+            CheckOneTerm(a, "", "");
+
+            //InputSource @is = new InputSource(this.GetType().getResource("da_UTF8.xml").toExternalForm());
+            using (var @is = this.GetType().getResourceAsStream("da_UTF8.xml"))
+            {
+
+                HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(@is);
+                Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this, hyphenator);
+                CheckOneTerm(b, "", "");
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private CharArraySet dict;
+
+            public AnalyzerAnonymousInnerClassHelper4(TestCompoundWordTokenFilter outerInstance, CharArraySet dict)
+            {
+                this.outerInstance = outerInstance;
+                this.dict = dict;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new DictionaryCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, dict));
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+        {
+            private readonly TestCompoundWordTokenFilter outerInstance;
+
+            private HyphenationTree hyphenator;
+
+            public AnalyzerAnonymousInnerClassHelper5(TestCompoundWordTokenFilter outerInstance, HyphenationTree hyphenator)
+            {
+                this.outerInstance = outerInstance;
+                this.hyphenator = hyphenator;
+            }
+
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                TokenFilter filter = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, tokenizer, hyphenator);
+                return new TokenStreamComponents(tokenizer, filter);
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
index dd219c0..4d469b1 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestDictionaryCompoundWordTokenFilterFactory.cs
@@ -1,7 +1,10 @@
-\ufeffnamespace org.apache.lucene.analysis.compound
-{
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.IO;
+using NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Compound
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,43 +21,37 @@
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Simple tests to ensure the Dictionary compound filter factory is working.
+    /// </summary>
+    public class TestDictionaryCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+    {
+        /// <summary>
+        /// Ensure the filter actually decompounds text.
+        /// </summary>
+        [Test]
+        public virtual void TestDecompounding()
+        {
+            TextReader reader = new StringReader("I like to play softball");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "I", "like", "to", "play", "softball", "soft", "ball" });
+        }
 
-	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-	/// <summary>
-	/// Simple tests to ensure the Dictionary compound filter factory is working.
-	/// </summary>
-	public class TestDictionaryCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
-	{
-	  /// <summary>
-	  /// Ensure the filter actually decompounds text.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testDecompounding() throws Exception
-	  public virtual void testDecompounding()
-	  {
-		Reader reader = new StringReader("I like to play softball");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt").create(stream);
-		assertTokenStreamContents(stream, new string[] {"I", "like", "to", "play", "softball", "soft", "ball"});
-	  }
-
-	  /// <summary>
-	  /// Test that bogus arguments result in exception </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBogusArguments() throws Exception
-	  public virtual void testBogusArguments()
-	  {
-		try
-		{
-		  tokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt", "bogusArg", "bogusValue");
-		  fail();
-		}
-		catch (System.ArgumentException expected)
-		{
-		  assertTrue(expected.Message.contains("Unknown parameters"));
-		}
-	  }
-	}
-
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenFilterFactory("DictionaryCompoundWord", "dictionary", "compoundDictionary.txt", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
index f195618..79bf1a5 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestHyphenationCompoundWordTokenFilterFactory.cs
@@ -1,7 +1,10 @@
-\ufeffnamespace org.apache.lucene.analysis.compound
-{
+\ufeffusing Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System.IO;
 
-	/*
+namespace Lucene.Net.Analysis.Compound
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,60 +21,53 @@
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Simple tests to ensure the Hyphenation compound filter factory is working.
+    /// </summary>
+    public class TestHyphenationCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
+    {
+        /// <summary>
+        /// Ensure the factory works with hyphenation grammar+dictionary: using default options.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationWithDictionary()
+        {
+            TextReader reader = new StringReader("min veninde som er lidt af en l�sehest");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "dictionary", "da_compoundDictionary.txt").Create(stream);
 
-	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-	/// <summary>
-	/// Simple tests to ensure the Hyphenation compound filter factory is working.
-	/// </summary>
-	public class TestHyphenationCompoundWordTokenFilterFactory : BaseTokenStreamFactoryTestCase
-	{
-	  /// <summary>
-	  /// Ensure the factory works with hyphenation grammar+dictionary: using default options.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationWithDictionary() throws Exception
-	  public virtual void testHyphenationWithDictionary()
-	  {
-		Reader reader = new StringReader("min veninde som er lidt af en l�sehest");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "dictionary", "da_compoundDictionary.txt").create(stream);
-
-		assertTokenStreamContents(stream, new string[] {"min", "veninde", "som", "er", "lidt", "af", "en", "l�sehest", "l�se", "hest"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 0, 0});
-	  }
-
-	  /// <summary>
-	  /// Ensure the factory works with no dictionary: using hyphenation grammar only.
-	  /// Also change the min/max subword sizes from the default. When using no dictionary,
-	  /// its generally necessary to tweak these, or you get lots of expansions.
-	  /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testHyphenationOnly() throws Exception
-	  public virtual void testHyphenationOnly()
-	  {
-		Reader reader = new StringReader("basketballkurv");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "minSubwordSize", "2", "maxSubwordSize", "4").create(stream);
+            AssertTokenStreamContents(stream, new string[] { "min", "veninde", "som", "er", "lidt", "af", "en", "l�sehest", "l�se", "hest" }, new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 });
+        }
 
-		assertTokenStreamContents(stream, new string[] {"basketballkurv", "ba", "sket", "bal", "ball", "kurv"});
-	  }
+        /// <summary>
+        /// Ensure the factory works with no dictionary: using hyphenation grammar only.
+        /// Also change the min/max subword sizes from the default. When using no dictionary,
+        /// its generally necessary to tweak these, or you get lots of expansions.
+        /// </summary>
+        [Test]
+        public virtual void TestHyphenationOnly()
+        {
+            TextReader reader = new StringReader("basketballkurv");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "minSubwordSize", "2", "maxSubwordSize", "4").Create(stream);
 
-	  /// <summary>
-	  /// Test that bogus arguments result in exception </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBogusArguments() throws Exception
-	  public virtual void testBogusArguments()
-	  {
-		try
-		{
-		  tokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "bogusArg", "bogusValue");
-		  fail();
-		}
-		catch (System.ArgumentException expected)
-		{
-		  assertTrue(expected.Message.contains("Unknown parameters"));
-		}
-	  }
-	}
+            AssertTokenStreamContents(stream, new string[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" });
+        }
 
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenFilterFactory("HyphenationCompoundWord", "hyphenator", "da_UTF8.xml", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
new file mode 100644
index 0000000..f4977b5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the DictionaryCompound factory
+soft
+ball
+team

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
new file mode 100644
index 0000000..2c8d203
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_UTF8.xml
@@ -0,0 +1,1208 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE hyphenation-info SYSTEM "hyphenation.dtd">
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--
+This file contains the hyphenation patterns for danish.
+Adapted from dkhyph.tex, dkcommon.tex and dkspecial.tex
+originally created by Frank Jensen (fj@iesd.auc.dk).
+FOP adaptation by Carlos Villegas (cav@uniscope.co.jp)
+-->
+<hyphenation-info>
+
+<hyphen-char value="-"/>
+<hyphen-min before="2" after="2"/>
+
+<classes>
+aA
+bB
+cC
+dD
+eE
+fF
+gG
+hH
+iI
+jJ
+kK
+lL
+mM
+nN
+oO
+pP
+qQ
+rR
+sS
+tT
+uU
+vV
+wW
+xX
+yY
+zZ
+��
+��
+��
+</classes>
+<patterns>
+<!-- dkcommon -->
+.ae3
+.an3k
+.an1s
+.be5la
+.be1t
+.bi4tr
+.der3i
+.diagno5
+.her3
+.hoved3
+.ne4t5
+.om1
+.ove4
+.po1
+.til3
+.yd5r
+ab5le
+3abst
+a3c
+ade5la
+5adg
+a1e
+5afg
+5a4f1l
+af3r
+af4ri
+5afs
+a4gef
+a4gi
+ag5in
+ag5si
+3agti
+a4gy
+a3h
+ais5t
+a3j
+a5ka
+a3ke
+a5kr
+aku5
+a3la
+a1le
+a1li
+al3k
+4alkv
+a1lo
+al5si
+a3lu
+a1ly
+am4pa
+3analy
+an4k5r
+a3nu
+3anv
+a5o
+a5pe
+a3pi
+a5po
+a1ra
+ar5af
+1arb
+a1re
+5arg
+a1ri
+a3ro
+a3sa
+a3sc
+a1si
+a3sk
+a3so
+3a3sp
+a3ste
+a3sti
+a1ta1
+a1te
+a1ti
+a4t5in
+a1to
+ato5v
+a5tr
+a1tu
+a5va
+a1ve
+a5z
+1ba
+ba4ti
+4bd
+1be
+be1k
+be3ro
+be5ru
+be1s4
+be1tr
+1bi
+bi5sk
+b1j
+4b1n
+1bo
+bo4gr
+bo3ra
+bo5re
+1br4
+4bs
+bs5k
+b3so
+b1st
+b5t
+3bu
+bu4s5tr
+b5w
+1by
+by5s
+4c1c
+1ce
+ce5ro
+3ch
+4ch.
+ci4o
+ck3
+5cy
+3da
+4d3af
+d5anta
+da4s
+d1b
+d1d4
+1de
+de5d
+4de4lem
+der5eri
+de4rig
+de5sk
+d1f
+d1g
+d3h
+1di
+di1e
+di5l
+d3j
+d1k
+d1l
+d1m
+4d1n
+3do
+4dop
+d5ov
+d1p
+4drett
+5d4reve
+3drif
+3driv
+d5ros
+d5ru
+ds5an
+ds5in
+d1ski
+d4sm
+d4su
+dsu5l
+ds5vi
+d3ta
+d1te
+dt5o
+d5tr
+dt5u
+1du
+dub5
+d1v
+3dy
+e5ad
+e3af
+e5ag
+e3ak
+e1al
+ea4la
+e3an
+e5ap
+e3at
+e3bl
+ebs3
+e1ci
+ed5ar
+edde4
+eddel5
+e4do
+ed5ra
+ed3re
+ed3rin
+ed4str
+e3e
+3eff
+e3fr
+3eft
+e3gu
+e1h
+e3in
+ei5s
+e3je
+e4j5el
+e1ka
+e3ke
+e3kl
+4e1ko
+e5kr
+ek5sa
+3eksem
+3eksp
+e3ku
+e1kv
+e5ky
+e3lad
+el3ak
+el3ar
+e1las
+e3le
+e4lek
+3elem
+e1li
+5elim
+e3lo
+el5sa
+e5lu
+e3ly
+e4mad
+em4p5le
+em1s
+en5ak
+e4nan
+4enn
+e4no
+en3so
+e5nu
+e5ol
+e3op
+e1or
+e3ov
+epi3
+e1pr
+e3ra
+er3af
+e4rag
+e4rak
+e1re
+e4ref
+er5ege
+5erhv
+e1ri
+e4rib
+er1k
+ero5d
+er5ov
+er3s
+er5tr
+e3rum
+er5un
+e5ry
+e1ta
+e1te
+etek4s
+e1ti
+e3tj
+e1to
+e3tr
+e3tu
+e1ty
+e3um
+e3un
+3eur
+e1va
+e3ve
+e4v3erf
+e1vi
+e5x
+1fa
+fa4ce
+fags3
+f1b
+f1d
+1fe
+fej4
+fejl1
+f1f
+f1g
+f1h
+1fi
+f1k
+3fl
+1fo
+for1en
+fo4ri
+f1p
+f1s4
+4ft
+f3ta
+f1te
+f1ti
+f5to
+f5tvi
+1fu
+f1v
+3fy
+1ga
+g3art
+g1b
+g1d
+1ge
+4g5enden
+ger3in
+ge3s
+g3f
+g1g
+g1h
+1gi
+gi4b
+gi3st
+5gj
+g3k
+g1l
+g1m
+3go
+4g5om
+g5ov
+g3p
+1gr
+gs1a
+gsde4len
+g4se
+gsha4
+g5sla
+gs3or
+gs1p
+g5s4tide
+g4str
+gs1v
+g3ta
+g1te
+g1ti
+g5to
+g3tr
+gt4s
+g3ud
+gun5
+g3v
+1gy
+g5yd
+4ha.
+heds3
+he5s
+4het
+hi4e
+hi4n5
+hi3s
+ho5ko
+ho5ve
+4h3t
+hun4
+hund3
+hvo4
+i1a
+i3b
+i4ble
+i1c
+i3dr
+ids5k
+i1el
+i1en
+i3er
+i3et.
+if3r
+i3gu
+i3h
+i5i
+i5j
+i1ka
+i1ke
+ik1l
+i5ko
+ik3re
+ik5ri
+iks5t
+ik4tu
+i3ku
+ik3v
+i3lag
+il3eg
+il5ej
+il5el
+i3li
+i4l5id
+il3k
+i1lo
+il5u
+i3mu
+ind3t
+5inf
+ings1
+in3s
+in4sv
+inter1
+i3nu
+i3od
+i3og
+i5ok
+i3ol
+ion4
+ions1
+i5o5r
+i3ot
+i5pi
+i3pli
+i5pr
+i3re
+i3ri
+ir5t
+i3sc
+i3si
+i4sm
+is3p
+i1ster
+i3sti
+i5sua
+i1ta
+i1te
+i1ti
+i3to
+i3tr
+it5re.
+i1tu
+i3ty
+i1u
+i1va
+i1ve
+i1vi
+j3ag
+jde4rer
+jds1
+jek4to
+4j5en.
+j5k
+j3le
+j3li
+jlmeld5
+jlmel4di
+j3r
+jre5
+ju3s
+5kap
+k5au
+5kav
+k5b
+kel5s
+ke3sk
+ke5st
+ke4t5a
+k3h
+ki3e
+ki3st
+k1k
+k5lak
+k1le
+3klu
+k4ny
+5kod
+1kon
+ko3ra
+3kort
+ko3v
+1kra
+5kry
+ks3an
+k1si
+ks3k
+ks1p
+k3ste
+k5stu
+ks5v
+k1t
+k4tar
+k4terh
+kti4e
+kt5re
+kt5s
+3kur
+1kus
+3kut
+k4vo
+k4vu
+5lab
+lad3r
+5lagd
+la4g3r
+5lam
+1lat
+l1b
+ldiagnos5
+l3dr
+ld3st
+1le.
+5led
+4lele
+le4mo
+3len
+1ler
+1les
+4leu
+l1f
+lfin4
+lfind5
+l1go1
+l3h
+li4ga
+4l5ins
+4l3int
+li5o
+l3j
+l1ke
+l1ko
+l3ky
+l1l
+l5mu
+lo4du
+l3op
+4l5or
+3lov
+4l3p
+l4ps
+l3r
+4ls
+lses1
+ls5in
+l5sj
+l1ta
+l4taf
+l1te
+l4t5erf
+l3ti
+lt3o
+l3tr
+l3tu
+lu5l
+l3ve
+l3vi
+1ma
+m1b
+m3d
+1me
+4m5ej
+m3f
+m1g
+m3h
+1mi
+mi3k
+m5ing
+mi4o
+mi5sty
+m3k
+m1l
+m1m
+mmen5
+m1n
+3mo
+mo4da
+4mop
+4m5ov
+m1pe
+m3pi
+m3pl
+m1po
+m3pr
+m1r
+mse5s
+ms5in
+m5sk
+ms3p
+m3ste
+ms5v
+m3ta
+m3te
+m3ti
+m3tr
+m1ud
+1mul
+mu1li
+3my
+3na
+4nak
+1nal
+n1b
+n1c
+4nd
+n3dr
+nd5si
+nd5sk
+nd5sp
+1ne
+ne5a
+ne4da
+nemen4
+nement5e
+neo4
+n3erk
+n5erl
+ne5sl
+ne5st
+n1f
+n4go
+4n1h
+1ni
+4nim
+ni5o
+ni3st
+n1ke
+n1ko
+n3kr
+n3ku
+n5kv
+4n1l
+n1m
+n1n
+1no
+n3ord
+n5p
+n3r
+4ns
+n3si
+n1sku
+ns3po
+n1sta
+n5sti
+n1ta
+nta4le
+n1te
+n1ti
+ntiali4
+n3to
+n1tr
+nt4s5t
+nt4su
+n3tu
+n3ty
+4n1v
+3ny
+n3z
+o3a
+o4as
+ob3li
+o1c
+o4din
+od5ri
+od5s
+od5un
+o1e
+of5r
+o4gek
+o4gel
+o4g5o
+og5re
+og5sk
+o5h
+o5in
+oi6s5e
+o1j
+o3ka
+o1ke
+o3ku
+o3la
+o3le
+o1li
+o1lo
+o3lu
+o5ly
+1omr
+on3k
+ook5
+o3or
+o5ov
+o3pi
+op3l
+op3r
+op3s
+3opta
+4or.
+or1an
+3ordn
+ord5s
+o3re.
+o3reg
+o3rek
+o3rer
+o3re3s
+o3ret
+o3ri
+3orient
+or5im
+o4r5in
+or3k
+or5o
+or3sl
+or3st
+o3si
+o3so
+o3t
+o1te
+o5un
+ov4s
+3pa
+pa5gh
+p5anl
+p3d
+4pec
+3pen
+1per
+pe1ra
+pe5s
+pe3u
+p3f
+4p5h
+1pla
+p4lan
+4ple.
+4pler
+4ples
+p3m
+p3n
+5pok
+4po3re
+3pot
+4p5p4
+p4ro
+1proc
+p3sk
+p5so
+ps4p
+p3st
+p1t
+1pu
+pu5b
+p5ule
+p5v
+5py3
+qu4
+4raf
+ra5is
+4rarb
+r1b
+r4d5ar
+r3dr
+rd4s3
+4reks
+1rel
+re5la
+r5enss
+5rese
+re5spo
+4ress
+re3st
+re5s4u
+5rett
+r1f
+r1gu
+r1h
+ri1e
+ri5la
+4rimo
+r4ing
+ringse4
+ringso4r
+4rinp
+4rint
+r3ka
+r1ke
+r1ki
+rk3so
+r3ku
+r1l
+rmo4
+r5mu
+r1n
+ro1b
+ro3p
+r3or
+r3p
+r1r
+rre5s
+rro4n5
+r1sa
+r1si
+r5skr
+r4sk5v
+rs4n
+r3sp
+r5stu
+r5su
+r3sv
+r5tal
+r1te
+r4teli
+r1ti
+r3to
+r4t5or
+rt5rat
+rt3re
+r5tri
+r5tro
+rt3s
+r5ty
+r3ud
+run4da
+5rut
+r3va
+r1ve
+r3vi
+ry4s
+s3af
+1sam
+sa4ma
+s3ap
+s1ar
+1sat
+4s1b
+s1d
+sdy4
+1se
+s4ed
+5s4er
+se4se
+s1f
+4s1g4
+4s3h
+si4bl
+1sig
+s5int
+5sis
+5sit
+5siu
+s5ju
+4sk.
+1skab
+1ske
+s3kl
+sk5s4
+5sky
+s1le
+s1li
+slo3
+5slu
+s5ly
+s1m
+s4my
+4snin
+s4nit
+so5k
+5sol
+5som.
+3somm
+s5oms
+5somt
+3son
+4s1op
+sp4
+3spec
+4sper
+3s4pi
+s1pl
+3sprog.
+s5r4
+s1s4
+4st.
+5s4tam
+1stan
+st5as
+3stat
+1stav
+1ste.
+1sted
+3stel
+5stemo
+1sten
+5step
+3ster.
+3stes
+5stet
+5stj
+3sto
+st5om
+1str
+s1ud
+3sul
+s3un
+3sur
+s3ve
+3s4y
+1sy1s
+5ta.
+1tag
+tands3
+4tanv
+4tb
+tede4l
+teds5
+3teg
+5tekn
+teo1
+5term
+te5ro
+4t1f
+6t3g
+t1h
+tialis5t
+3tid
+ti4en
+ti3st
+4t3k
+4t1l
+tli4s5
+t1m
+t1n
+to5ra
+to1re
+to1ri
+tor4m
+4t3p
+t4ra
+4tres
+tro5v
+1try
+4ts
+t3si
+ts4pa
+ts5pr
+t3st
+ts5ul
+4t1t
+t5uds
+5tur
+t5ve
+1typ
+u1a
+5udl
+ud5r
+ud3s
+3udv
+u1e
+ue4t5
+uge4ri
+ugs3
+u5gu
+u3i
+u5kl
+uk4ta
+uk4tr
+u1la
+u1le
+u5ly
+u5pe
+up5l
+u5q
+u3ra
+u3re
+u4r3eg
+u1rer
+u3ro
+us5a
+u3si
+u5ska
+u5so
+us5v
+u1te
+u1ti
+u1to
+ut5r
+ut5s4
+5u5v
+va5d
+3varm
+1ved
+ve4l5e
+ve4reg
+ve3s
+5vet
+v5h
+vi4l3in
+1vis
+v5j
+v5k
+vl4
+v3le
+v5li
+vls1
+1vo
+4v5om
+v5p
+v5re
+v3st
+v5su
+v5t
+3vu
+y3a
+y5dr
+y3e
+y3ke
+y5ki
+yk3li
+y3ko
+yk4s5
+y3kv
+y5li
+y5lo
+y5mu
+yns5
+y5o
+y1pe
+y3pi
+y3re
+yr3ek
+y3ri
+y3si
+y3ti
+y5t3r
+y5ve
+zi5o
+<!-- dkspecial -->
+.s�3
+.�r5i
+.�v3r
+a3t�
+a5v�
+br�d3
+5b�
+5dr�v
+dst�4
+3d�
+3d�
+e3l�
+e3l�
+e3r�
+er5�n
+e5t�
+e5t�
+e1v�
+e3�
+e5�
+3f�
+3f�
+f�4r5en
+gi�4
+g4s�
+g5s�
+3g�
+3g�1
+3g�
+i5t�
+i3�
+3k�
+3k�
+lingeni�4
+l3v�
+5l�s
+m5t�
+1m�
+3m�
+3m�
+n3k�
+n5t�
+3n�
+4n5�b
+5n�
+o5l�
+or3�
+o5�
+5pr�
+5p�d
+p�3
+r5k�
+r5t�
+r5t�
+r3v�
+r5�l
+4r�n
+5r�r
+3r�d
+r5�r
+s4k�
+3sl�
+s4n�
+5st�
+1st�
+1s�
+4s5�n
+1s�
+s5�k
+s�4r5
+ti4�
+3tr�k.
+t4s�
+t5s�
+t3v�
+u3l�
+3v�rd
+1v�rk
+5v�
+y5v�
+�b3l
+�3c
+�3e
+�g5a
+�4gek
+�4g5r
+�gs5
+�5i
+�5kv
+�lle4
+�n1dr
+�5o
+�1re
+�r4g5r
+�3ri
+�r4ma
+�r4mo
+�r5s
+�5si
+�3so
+�3ste
+�3ve
+�de5
+�3e
+�1je
+�3ke
+�3le
+�ms5
+�n3st
+�n4t3
+�1re
+�3ri
+�rne3
+�r5o
+�1ve
+�1d
+�1e
+�5h
+�3l
+�3re
+�rs5t
+�5sk
+�3t
+</patterns>
+</hyphenation-info>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
new file mode 100644
index 0000000..9a14f40
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/da_compoundDictionary.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# A set of words for testing the HyphenationCompound factory,
+# in conjunction with the danish hyphenation grammar.
+l�se
+hest

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 1b641b7..504ec5f 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -84,6 +84,9 @@
     <Compile Include="Analysis\CommonGrams\CommonGramsFilterTest.cs" />
     <Compile Include="Analysis\CommonGrams\TestCommonGramsFilterFactory.cs" />
     <Compile Include="Analysis\CommonGrams\TestCommonGramsQueryFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\TestCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\TestDictionaryCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\TestHyphenationCompoundWordTokenFilterFactory.cs" />
     <Compile Include="Analysis\Core\TestAnalyzers.cs" />
     <Compile Include="Analysis\Core\TestBugInSomething.cs" />
     <Compile Include="Analysis\Core\TestClassicAnalyzer.cs" />
@@ -476,7 +479,11 @@
     <EmbeddedResource Include="Analysis\No\nn_light.txt" />
     <EmbeddedResource Include="Analysis\No\nn_minimal.txt" />
   </ItemGroup>
-  <ItemGroup />
+  <ItemGroup>
+    <EmbeddedResource Include="Analysis\Compound\compoundDictionary.txt" />
+    <EmbeddedResource Include="Analysis\Compound\da_UTF8.xml" />
+    <EmbeddedResource Include="Analysis\Compound\da_compoundDictionary.txt" />
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.


[02/50] [abbrv] lucenenet git commit: Added missing overloads on the Core.Analysis.Tokenattributes.ICharTermAttribute to append ICharSequence.

Posted by sy...@apache.org.
Added missing overloads on the Core.Analysis.Tokenattributes.ICharTermAttribute to append ICharSequence.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d9fb43ef
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d9fb43ef
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d9fb43ef

Branch: refs/heads/analysis-work
Commit: d9fb43ef55334783886d5b7a15c618db02e37d96
Parents: 572ad69
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 10:37:26 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 11:34:08 2016 +0700

----------------------------------------------------------------------
 .../Tokenattributes/ICharTermAttribute.cs       | 38 +++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9fb43ef/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
index 50fd215..bca2d65 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
@@ -1,26 +1,26 @@
+using Lucene.Net.Support;
+using Lucene.Net.Util;
 using System;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Tokenattributes
 {
-    using Lucene.Net.Util;
-
     /*
-             * Licensed to the Apache Software Foundation (ASF) under one or more
-             * contributor license agreements.  See the NOTICE file distributed with
-             * this work for additional information regarding copyright ownership.
-             * The ASF licenses this file to You under the Apache License, Version 2.0
-             * (the "License"); you may not use this file except in compliance with
-             * the License.  You may obtain a copy of the License at
-             *
-             *     http://www.apache.org/licenses/LICENSE-2.0
-             *
-             * Unless required by applicable law or agreed to in writing, software
-             * distributed under the License is distributed on an "AS IS" BASIS,
-             * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-             * See the License for the specific language governing permissions and
-             * limitations under the License.
-             */
+    * Licensed to the Apache Software Foundation (ASF) under one or more
+    * contributor license agreements.  See the NOTICE file distributed with
+    * this work for additional information regarding copyright ownership.
+    * The ASF licenses this file to You under the Apache License, Version 2.0
+    * (the "License"); you may not use this file except in compliance with
+    * the License.  You may obtain a copy of the License at
+    *
+    *     http://www.apache.org/licenses/LICENSE-2.0
+    *
+    * Unless required by applicable law or agreed to in writing, software
+    * distributed under the License is distributed on an "AS IS" BASIS,
+    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    * See the License for the specific language governing permissions and
+    * limitations under the License.
+    */
 
     /// <summary>
     /// The term text of a Token.
@@ -75,6 +75,10 @@ namespace Lucene.Net.Analysis.Tokenattributes
         ICharTermAttribute SetEmpty();
 
         // the following methods are redefined to get rid of IOException declaration:
+        ICharTermAttribute Append(ICharSequence csq);
+
+        ICharTermAttribute Append(ICharSequence csq, int start, int end);
+
         ICharTermAttribute Append(string csq, int start, int end);
 
         ICharTermAttribute Append(char c);


[08/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
index 19069c9..fefda71 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -1,4 +1,5 @@
 \ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 
@@ -78,208 +79,257 @@ namespace Lucene.Net.Analysis.En
     {
         private const int MaxWordLen = 50;
 
-        private static readonly string[] exceptionWords = new string[] { "aide", "bathe", "caste", "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage", "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane", "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune", "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite", "swinge", "tare", "tine", "tope", "tripe", "twine" };
+        private static readonly string[] exceptionWords = new string[] { "aide", "bathe", "caste",
+            "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage",
+            "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane",
+            "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune",
+            "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite",
+            "swinge", "tare", "tine", "tope", "tripe", "twine"
+        };
 
         private static readonly string[][] directConflations = new string[][]
         {
-          new string[] {"aging", "age"},
-          new string[] {"going", "go"},
-          new string[] {"goes", "go"},
-          new string[] {"lying", "lie"},
-          new string[] {"using", "use"},
-          new string[] {"owing", "owe"},
-          new string[] {"suing", "sue"},
-          new string[] {"dying", "die"},
-          new string[] {"tying", "tie"},
-          new string[] {"vying", "vie"},
-          new string[] {"aged", "age"},
-          new string[] {"used", "use"},
-          new string[] {"vied", "vie"},
-          new string[] {"cued", "cue"},
-          new string[] {"died", "die"},
-          new string[] {"eyed", "eye"},
-          new string[] {"hued", "hue"},
-          new string[] {"iced", "ice"},
-          new string[] {"lied", "lie"},
-          new string[] {"owed", "owe"},
-          new string[] {"sued", "sue"},
-          new string[] {"toed", "toe"},
-          new string[] {"tied", "tie"},
-          new string[] {"does", "do"},
-          new string[] {"doing", "do"},
-          new string[] {"aeronautical", "aeronautics"},
-          new string[] {"mathematical", "mathematics"},
-          new string[] {"political", "politics"},
-          new string[] {"metaphysical", "metaphysics"},
-          new string[] {"cylindrical", "cylinder"},
-          new string[] {"nazism", "nazi"},
-          new string[] {"ambiguity", "ambiguous"},
-          new string[] {"barbarity", "barbarous"},
-          new string[] {"credulity", "credulous"},
-          new string[] {"generosity", "generous"},
-          new string[] {"spontaneity", "spontaneous"},
-          new string[] {"unanimity", "unanimous"},
-          new string[] {"voracity", "voracious"},
-          new string[] {"fled", "flee"},
-          new string[] {"miscarriage", "miscarry"}
+            new string[] {"aging", "age"},
+            new string[] {"going", "go"},
+            new string[] {"goes", "go"},
+            new string[] {"lying", "lie"},
+            new string[] {"using", "use"},
+            new string[] {"owing", "owe"},
+            new string[] {"suing", "sue"},
+            new string[] {"dying", "die"},
+            new string[] {"tying", "tie"},
+            new string[] {"vying", "vie"},
+            new string[] {"aged", "age"},
+            new string[] {"used", "use"},
+            new string[] {"vied", "vie"},
+            new string[] {"cued", "cue"},
+            new string[] {"died", "die"},
+            new string[] {"eyed", "eye"},
+            new string[] {"hued", "hue"},
+            new string[] {"iced", "ice"},
+            new string[] {"lied", "lie"},
+            new string[] {"owed", "owe"},
+            new string[] {"sued", "sue"},
+            new string[] {"toed", "toe"},
+            new string[] {"tied", "tie"},
+            new string[] {"does", "do"},
+            new string[] {"doing", "do"},
+            new string[] {"aeronautical", "aeronautics"},
+            new string[] {"mathematical", "mathematics"},
+            new string[] {"political", "politics"},
+            new string[] {"metaphysical", "metaphysics"},
+            new string[] {"cylindrical", "cylinder"},
+            new string[] {"nazism", "nazi"},
+            new string[] {"ambiguity", "ambiguous"},
+            new string[] {"barbarity", "barbarous"},
+            new string[] {"credulity", "credulous"},
+            new string[] {"generosity", "generous"},
+            new string[] {"spontaneity", "spontaneous"},
+            new string[] {"unanimity", "unanimous"},
+            new string[] {"voracity", "voracious"},
+            new string[] {"fled", "flee"},
+            new string[] {"miscarriage", "miscarry"}
         };
 
         private static readonly string[][] countryNationality = new string[][]
         {
-          new string[] {"afghan", "afghanistan"},
-          new string[] {"african", "africa"},
-          new string[] {"albanian", "albania"},
-          new string[] {"algerian", "algeria"},
-          new string[] {"american", "america"},
-          new string[] {"andorran", "andorra"},
-          new string[] {"angolan", "angola"},
-          new string[] {"arabian", "arabia"},
-          new string[] {"argentine", "argentina"},
-          new string[] {"armenian", "armenia"},
-          new string[] {"asian", "asia"},
-          new string[] {"australian", "australia"},
-          new string[] {"austrian", "austria"},
-          new string[] {"azerbaijani", "azerbaijan"},
-          new string[] {"azeri", "azerbaijan"},
-          new string[] {"bangladeshi", "bangladesh"},
-          new string[] {"belgian", "belgium"},
-          new string[] {"bermudan", "bermuda"},
-          new string[] {"bolivian", "bolivia"},
-          new string[] {"bosnian", "bosnia"},
-          new string[] {"botswanan", "botswana"},
-          new string[] {"brazilian", "brazil"},
-          new string[] {"british", "britain"},
-          new string[] {"bulgarian", "bulgaria"},
-          new string[] {"burmese", "burma"},
-          new string[] {"californian", "california"},
-          new string[] {"cambodian", "cambodia"},
-          new string[] {"canadian", "canada"},
-          new string[] {"chadian", "chad"},
-          new string[] {"chilean", "chile"},
-          new string[] {"chinese", "china"},
-          new string[] {"colombian", "colombia"},
-          new string[] {"croat", "croatia"},
-          new string[] {"croatian", "croatia"},
-          new string[] {"cuban", "cuba"},
-          new string[] {"cypriot", "cyprus"},
-          new string[] {"czechoslovakian", "czechoslovakia"},
-          new string[] {"danish", "denmark"},
-          new string[] {"egyptian", "egypt"},
-          new string[] {"equadorian", "equador"},
-          new string[] {"eritrean", "eritrea"},
-          new string[] {"estonian", "estonia"},
-          new string[] {"ethiopian", "ethiopia"},
-          new string[] {"european", "europe"},
-          new string[] {"fijian", "fiji"},
-          new string[] {"filipino", "philippines"},
-          new string[] {"finnish", "finland"},
-          new string[] {"french", "france"},
-          new string[] {"gambian", "gambia"},
-          new string[] {"georgian", "georgia"},
-          new string[] {"german", "germany"},
-          new string[] {"ghanian", "ghana"},
-          new string[] {"greek", "greece"},
-          new string[] {"grenadan", "grenada"},
-          new string[] {"guamian", "guam"},
-          new string[] {"guatemalan", "guatemala"},
-          new string[] {"guinean", "guinea"},
-          new string[] {"guyanan", "guyana"},
-          new string[] {"haitian", "haiti"},
-          new string[] {"hawaiian", "hawaii"},
-          new string[] {"holland", "dutch"},
-          new string[] {"honduran", "honduras"},
-          new string[] {"hungarian", "hungary"},
-          new string[] {"icelandic", "iceland"},
-          new string[] {"indonesian", "indonesia"},
-          new string[] {"iranian", "iran"},
-          new string[] {"iraqi", "iraq"},
-          new string[] {"iraqui", "iraq"},
-          new string[] {"irish", "ireland"},
-          new string[] {"israeli", "israel"},
-          new string[] {"italian", "italy"},
-          new string[] {"jamaican", "jamaica"},
-          new string[] {"japanese", "japan"},
-          new string[] {"jordanian", "jordan"},
-          new string[] {"kampuchean", "cambodia"},
-          new string[] {"kenyan", "kenya"},
-          new string[] {"korean", "korea"},
-          new string[] {"kuwaiti", "kuwait"},
-          new string[] {"lankan", "lanka"},
-          new string[] {"laotian", "laos"},
-          new string[] {"latvian", "latvia"},
-          new string[] {"lebanese", "lebanon"},
-          new string[] {"liberian", "liberia"},
-          new string[] {"libyan", "libya"},
-          new string[] {"lithuanian", "lithuania"},
-          new string[] {"macedonian", "macedonia"},
-          new string[] {"madagascan", "madagascar"},
-          new string[] {"malaysian", "malaysia"},
-          new string[] {"maltese", "malta"},
-          new string[] {"mauritanian", "mauritania"},
-          new string[] {"mexican", "mexico"},
-          new string[] {"micronesian", "micronesia"},
-          new string[] {"moldovan", "moldova"},
-          new string[] {"monacan", "monaco"},
-          new string[] {"mongolian", "mongolia"},
-          new string[] {"montenegran", "montenegro"},
-          new string[] {"moroccan", "morocco"},
-          new string[] {"myanmar", "burma"},
-          new string[] {"namibian", "namibia"},
-          new string[] {"nepalese", "nepal"},
-          new string[] {"nicaraguan", "nicaragua"},
-          new string[] {"nigerian", "nigeria"},
-          new string[] {"norwegian", "norway"},
-          new string[] {"omani", "oman"},
-          new string[] {"pakistani", "pakistan"},
-          new string[] {"panamanian", "panama"},
-          new string[] {"papuan", "papua"},
-          new string[] {"paraguayan", "paraguay"},
-          new string[] {"peruvian", "peru"},
-          new string[] {"portuguese", "portugal"},
-          new string[] {"romanian", "romania"},
-          new string[] {"rumania", "romania"},
-          new string[] {"rumanian", "romania"},
-          new string[] {"russian", "russia"},
-          new string[] {"rwandan", "rwanda"},
-          new string[] {"samoan", "samoa"},
-          new string[] {"scottish", "scotland"},
-          new string[] {"serb", "serbia"},
-          new string[] {"serbian", "serbia"},
-          new string[] {"siam", "thailand"},
-          new string[] {"siamese", "thailand"},
-          new string[] {"slovakia", "slovak"},
-          new string[] {"slovakian", "slovak"},
-          new string[] {"slovenian", "slovenia"},
-          new string[] {"somali", "somalia"},
-          new string[] {"somalian", "somalia"},
-          new string[] {"spanish", "spain"},
-          new string[] {"swedish", "sweden"},
-          new string[] {"swiss", "switzerland"},
-          new string[] {"syrian", "syria"},
-          new string[] {"taiwanese", "taiwan"},
-          new string[] {"tanzanian", "tanzania"},
-          new string[] {"texan", "texas"},
-          new string[] {"thai", "thailand"},
-          new string[] {"tunisian", "tunisia"},
-          new string[] {"turkish", "turkey"},
-          new string[] {"ugandan", "uganda"},
-          new string[] {"ukrainian", "ukraine"},
-          new string[] {"uruguayan", "uruguay"},
-          new string[] {"uzbek", "uzbekistan"},
-          new string[] {"venezuelan", "venezuela"},
-          new string[] {"vietnamese", "viet"},
-          new string[] {"virginian", "virginia"},
-          new string[] {"yemeni", "yemen"},
-          new string[] {"yugoslav", "yugoslavia"},
-          new string[] {"yugoslavian", "yugoslavia"},
-          new string[] {"zambian", "zambia"},
-          new string[] {"zealander", "zealand"},
-          new string[] {"zimbabwean", "zimbabwe"}
+            new string[] {"afghan", "afghanistan"},
+            new string[] {"african", "africa"},
+            new string[] {"albanian", "albania"},
+            new string[] {"algerian", "algeria"},
+            new string[] {"american", "america"},
+            new string[] {"andorran", "andorra"},
+            new string[] {"angolan", "angola"},
+            new string[] {"arabian", "arabia"},
+            new string[] {"argentine", "argentina"},
+            new string[] {"armenian", "armenia"},
+            new string[] {"asian", "asia"},
+            new string[] {"australian", "australia"},
+            new string[] {"austrian", "austria"},
+            new string[] {"azerbaijani", "azerbaijan"},
+            new string[] {"azeri", "azerbaijan"},
+            new string[] {"bangladeshi", "bangladesh"},
+            new string[] {"belgian", "belgium"},
+            new string[] {"bermudan", "bermuda"},
+            new string[] {"bolivian", "bolivia"},
+            new string[] {"bosnian", "bosnia"},
+            new string[] {"botswanan", "botswana"},
+            new string[] {"brazilian", "brazil"},
+            new string[] {"british", "britain"},
+            new string[] {"bulgarian", "bulgaria"},
+            new string[] {"burmese", "burma"},
+            new string[] {"californian", "california"},
+            new string[] {"cambodian", "cambodia"},
+            new string[] {"canadian", "canada"},
+            new string[] {"chadian", "chad"},
+            new string[] {"chilean", "chile"},
+            new string[] {"chinese", "china"},
+            new string[] {"colombian", "colombia"},
+            new string[] {"croat", "croatia"},
+            new string[] {"croatian", "croatia"},
+            new string[] {"cuban", "cuba"},
+            new string[] {"cypriot", "cyprus"},
+            new string[] {"czechoslovakian", "czechoslovakia"},
+            new string[] {"danish", "denmark"},
+            new string[] {"egyptian", "egypt"},
+            new string[] {"equadorian", "equador"},
+            new string[] {"eritrean", "eritrea"},
+            new string[] {"estonian", "estonia"},
+            new string[] {"ethiopian", "ethiopia"},
+            new string[] {"european", "europe"},
+            new string[] {"fijian", "fiji"},
+            new string[] {"filipino", "philippines"},
+            new string[] {"finnish", "finland"},
+            new string[] {"french", "france"},
+            new string[] {"gambian", "gambia"},
+            new string[] {"georgian", "georgia"},
+            new string[] {"german", "germany"},
+            new string[] {"ghanian", "ghana"},
+            new string[] {"greek", "greece"},
+            new string[] {"grenadan", "grenada"},
+            new string[] {"guamian", "guam"},
+            new string[] {"guatemalan", "guatemala"},
+            new string[] {"guinean", "guinea"},
+            new string[] {"guyanan", "guyana"},
+            new string[] {"haitian", "haiti"},
+            new string[] {"hawaiian", "hawaii"},
+            new string[] {"holland", "dutch"},
+            new string[] {"honduran", "honduras"},
+            new string[] {"hungarian", "hungary"},
+            new string[] {"icelandic", "iceland"},
+            new string[] {"indonesian", "indonesia"},
+            new string[] {"iranian", "iran"},
+            new string[] {"iraqi", "iraq"},
+            new string[] {"iraqui", "iraq"},
+            new string[] {"irish", "ireland"},
+            new string[] {"israeli", "israel"},
+            new string[] {"italian", "italy"},
+            new string[] {"jamaican", "jamaica"},
+            new string[] {"japanese", "japan"},
+            new string[] {"jordanian", "jordan"},
+            new string[] {"kampuchean", "cambodia"},
+            new string[] {"kenyan", "kenya"},
+            new string[] {"korean", "korea"},
+            new string[] {"kuwaiti", "kuwait"},
+            new string[] {"lankan", "lanka"},
+            new string[] {"laotian", "laos"},
+            new string[] {"latvian", "latvia"},
+            new string[] {"lebanese", "lebanon"},
+            new string[] {"liberian", "liberia"},
+            new string[] {"libyan", "libya"},
+            new string[] {"lithuanian", "lithuania"},
+            new string[] {"macedonian", "macedonia"},
+            new string[] {"madagascan", "madagascar"},
+            new string[] {"malaysian", "malaysia"},
+            new string[] {"maltese", "malta"},
+            new string[] {"mauritanian", "mauritania"},
+            new string[] {"mexican", "mexico"},
+            new string[] {"micronesian", "micronesia"},
+            new string[] {"moldovan", "moldova"},
+            new string[] {"monacan", "monaco"},
+            new string[] {"mongolian", "mongolia"},
+            new string[] {"montenegran", "montenegro"},
+            new string[] {"moroccan", "morocco"},
+            new string[] {"myanmar", "burma"},
+            new string[] {"namibian", "namibia"},
+            new string[] {"nepalese", "nepal"},
+            new string[] {"nicaraguan", "nicaragua"},
+            new string[] {"nigerian", "nigeria"},
+            new string[] {"norwegian", "norway"},
+            new string[] {"omani", "oman"},
+            new string[] {"pakistani", "pakistan"},
+            new string[] {"panamanian", "panama"},
+            new string[] {"papuan", "papua"},
+            new string[] {"paraguayan", "paraguay"},
+            new string[] {"peruvian", "peru"},
+            new string[] {"portuguese", "portugal"},
+            new string[] {"romanian", "romania"},
+            new string[] {"rumania", "romania"},
+            new string[] {"rumanian", "romania"},
+            new string[] {"russian", "russia"},
+            new string[] {"rwandan", "rwanda"},
+            new string[] {"samoan", "samoa"},
+            new string[] {"scottish", "scotland"},
+            new string[] {"serb", "serbia"},
+            new string[] {"serbian", "serbia"},
+            new string[] {"siam", "thailand"},
+            new string[] {"siamese", "thailand"},
+            new string[] {"slovakia", "slovak"},
+            new string[] {"slovakian", "slovak"},
+            new string[] {"slovenian", "slovenia"},
+            new string[] {"somali", "somalia"},
+            new string[] {"somalian", "somalia"},
+            new string[] {"spanish", "spain"},
+            new string[] {"swedish", "sweden"},
+            new string[] {"swiss", "switzerland"},
+            new string[] {"syrian", "syria"},
+            new string[] {"taiwanese", "taiwan"},
+            new string[] {"tanzanian", "tanzania"},
+            new string[] {"texan", "texas"},
+            new string[] {"thai", "thailand"},
+            new string[] {"tunisian", "tunisia"},
+            new string[] {"turkish", "turkey"},
+            new string[] {"ugandan", "uganda"},
+            new string[] {"ukrainian", "ukraine"},
+            new string[] {"uruguayan", "uruguay"},
+            new string[] {"uzbek", "uzbekistan"},
+            new string[] {"venezuelan", "venezuela"},
+            new string[] {"vietnamese", "viet"},
+            new string[] {"virginian", "virginia"},
+            new string[] {"yemeni", "yemen"},
+            new string[] {"yugoslav", "yugoslavia"},
+            new string[] {"yugoslavian", "yugoslavia"},
+            new string[] {"zambian", "zambia"},
+            new string[] {"zealander", "zealand"},
+            new string[] {"zimbabwean", "zimbabwe"}
         };
 
-        private static readonly string[] supplementDict = new string[] { "aids", "applicator", "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere", "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph", "toroid", "superconduct", "backscatter", "connectionism" };
-
-        private static readonly string[] properNouns = new string[] { "abrams", "achilles", "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred", "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles", "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher", "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor", "barbados", "barger", "bering", "brahms", "brandeis", "brussels", "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker", "charles", "cheops", "ching", "christmas", "cocos", "collins", "columbus", "confucius", "conners", "connolly", "copernicus", "cramer", "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies", "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris", "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries", "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans", "everglades", "fairbanks", "federales", "fisher", "fitzsimmons", "flem
 ing", "forbes", "fowler", "france", "francis", "goering", "goodling", "goths", "grenadines", "guiness", "hades", "harding", "harris", "hastings", "hawkes", "hawking", "hayes", "heights", "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras", "hopkins", "hughes", "humphreys", "illinois", "indianapolis", "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james", "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius", "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds", "levis", "leviticus", "lewis", "louis", "maccabees", "madras", "maimonides", "maldive", "massachusetts", "matthews", "mauritius", "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed", "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing", "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus", "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker", "pauling", "peking", "pershing", "peter", "peters", "philippi
 nes", "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais", "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins", "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates", "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius", "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers", "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares", "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers", "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing", "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius", "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester", "connally", "conner", "coors", "cummings", "curtis", "daedalus", "dionysus", "dobbs", "dolores", "edmonds" };
+        private static readonly string[] supplementDict = new string[] { "aids", "applicator",
+            "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere",
+            "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph",
+            "toroid", "superconduct", "backscatter", "connectionism"};
+
+        private static readonly string[] properNouns = new string[] { "abrams", "achilles",
+            "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred",
+            "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles",
+            "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher",
+            "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor",
+            "barbados", "barger", "bering", "brahms", "brandeis", "brussels",
+            "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker",
+            "charles", "cheops", "ching", "christmas", "cocos", "collins",
+            "columbus", "confucius", "conners", "connolly", "copernicus", "cramer",
+            "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies",
+            "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris",
+            "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries",
+            "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans",
+            "everglades", "fairbanks", "federales", "fisher", "fitzsimmons",
+            "fleming", "forbes", "fowler", "france", "francis", "goering",
+            "goodling", "goths", "grenadines", "guiness", "hades", "harding",
+            "harris", "hastings", "hawkes", "hawking", "hayes", "heights",
+            "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras",
+            "hopkins", "hughes", "humphreys", "illinois", "indianapolis",
+            "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james",
+            "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius",
+            "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds",
+            "levis", "leviticus", "lewis", "louis", "maccabees", "madras",
+            "maimonides", "maldive", "massachusetts", "matthews", "mauritius",
+            "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed",
+            "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing",
+            "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus",
+            "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker",
+            "pauling", "peking", "pershing", "peter", "peters", "philippines",
+            "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais",
+            "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins",
+            "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates",
+            "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius",
+            "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers",
+            "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares",
+            "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers",
+            "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing",
+            "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius",
+            "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester",
+            "connally", "conner", "coors", "cummings", "curtis", "daedalus",
+            "dionysus", "dobbs", "dolores", "edmonds"};
 
         internal class DictEntry
         {
@@ -787,71 +837,70 @@ namespace Lucene.Net.Analysis.En
                 k = j + 1;
 
                 DictEntry entry = WordInDict();
-                if (entry != null) /*
-		  {
-			  if (!entry.exception)
-	                                                * if it's in the dictionary and
-	                                                * not an exception
-	                                                */
+                if (entry != null) 
+		        {
+			        if (!entry.exception) 
+                    {
+                        // if it's in the dictionary and
+                        // not an exception
+                        return;
+                    }
+                }
+
+                /* try removing the "ed" */
+                word.Length = j + 1;
+                k = j;
+                if (Lookup())
                 {
                     return;
                 }
-            }
 
-            /* try removing the "ed" */
-            word.Length = j + 1;
-            k = j;
-            if (Lookup())
-            {
-                return;
-            }
+                /*
+                 * try removing a doubled consonant. if the root isn't found in the
+                 * dictionary, the default is to leave it doubled. This will correctly
+                 * capture `backfilled' -> `backfill' instead of `backfill' ->
+                 * `backfille', and seems correct most of the time
+                 */
 
-            /*
-             * try removing a doubled consonant. if the root isn't found in the
-             * dictionary, the default is to leave it doubled. This will correctly
-             * capture `backfilled' -> `backfill' instead of `backfill' ->
-             * `backfille', and seems correct most of the time
-             */
+                if (DoubleC(k))
+                {
+                    word.Length = k;
+                    k--;
+                    if (Lookup())
+                    {
+                        return;
+                    }
+                    word.UnsafeWrite(word.CharAt(k));
+                    k++;
+                    Lookup();
+                    return;
+                }
 
-            if (DoubleC(k))
-            {
-                word.Length = k;
-                k--;
-                if (Lookup())
+                /* if we have a `un-' prefix, then leave the word alone */
+                /* (this will sometimes screw up with `under-', but we */
+                /* will take care of that later) */
+
+                if ((word.CharAt(0) == 'u') && (word.CharAt(1) == 'n'))
                 {
+                    word.UnsafeWrite('e');
+                    word.UnsafeWrite('d');
+                    k = k + 2;
+                    // nolookup()
                     return;
                 }
-                word.UnsafeWrite(word.CharAt(k));
-                k++;
-                Lookup();
-                return;
-            }
 
-            /* if we have a `un-' prefix, then leave the word alone */
-            /* (this will sometimes screw up with `under-', but we */
-            /* will take care of that later) */
+                /*
+                 * it wasn't found by just removing the `d' or the `ed', so prefer to end
+                 * with an `e' (e.g., `microcoded' -> `microcode').
+                 */
 
-            if ((word.CharAt(0) == 'u') && (word.CharAt(1) == 'n'))
-            {
+                word.Length = j + 1;
                 word.UnsafeWrite('e');
-                word.UnsafeWrite('d');
-                k = k + 2;
-                // nolookup()
+                k = j + 1;
+                // nolookup() - we already tried the "e" ending
                 return;
             }
-
-            /*
-             * it wasn't found by just removing the `d' or the `ed', so prefer to end
-             * with an `e' (e.g., `microcoded' -> `microcode').
-             */
-
-            word.Length = j + 1;
-            word.UnsafeWrite('e');
-            k = j + 1;
-            // nolookup() - we already tried the "e" ending
-            return;
         }
-        //}
 
         /* return TRUE if word ends with a double consonant */
         private bool DoubleC(int i)
@@ -1825,11 +1874,10 @@ namespace Lucene.Net.Analysis.En
         //    return word.ToString();
         //}
 
-        //// LUCENENET: Do we need this?
-        //internal virtual ICharSequence asCharSequence()
-        //{
-        //    return result != null ? result : word;
-        //}
+        internal virtual ICharSequence AsCharSequence()
+        {
+            return result != null ? (ICharSequence)new CharsRef(result) : word;
+        }
 
         internal virtual string String
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index 963a45f..105feec 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -21,7 +21,7 @@
     <DebugType>full</DebugType>
     <Optimize>false</Optimize>
     <OutputPath>bin\Debug\</OutputPath>
-    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DefineConstants>TRACE</DefineConstants>
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
     <Prefer32Bit>false</Prefer32Bit>


[35/50] [abbrv] lucenenet git commit: Bug (memory leak): Dispose() should cascade to Dispose() to other objects and release its own resources.

Posted by sy...@apache.org.
Bug (memory leak): Dispose() should cascade to Dispose() to other objects and release its own resources.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9124e03e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9124e03e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9124e03e

Branch: refs/heads/analysis-work
Commit: 9124e03e9240b2bb21b985526466c4e1f9674d6b
Parents: 132da59
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 19:15:42 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 11:26:48 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Analysis/Tokenizer.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9124e03e/src/Lucene.Net.Core/Analysis/Tokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenizer.cs b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
index b0215be..a354310 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenizer.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenizer.cs
@@ -73,7 +73,7 @@ namespace Lucene.Net.Analysis
         /// </summary>
         public override void Dispose()
         {
-            input.Close();
+            input.Dispose();
             // LUCENE-2387: don't hold onto Reader after close, so
             // GC can reclaim
             InputPending = ILLEGAL_STATE_READER;


[09/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
index 7fdae2b..db28e93 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData8.cs
@@ -47,6 +47,606 @@ namespace Lucene.Net.Analysis.En
         private KStemData8()
         {
         }
-        internal static string[] data = new string[] { "tenor", "tenpin", "tense", "tensile", "tension", "tent", "tentacle", "tentative", "tenterhooks", "tenuity", "tenuous", "tenure", "tepee", "tepid", "tequila", "tercentenary", "tercentennial", "term", "termagant", "terminable", "terminal", "terminate", "termination", "terminology", "terminus", "termite", "terms", "tern", "terpsichorean", "terrace", "terracotta", "terrain", "terrapin", "terrestrial", "terrible", "terribly", "terrier", "terrific", "terrifically", "terrify", "territorial", "territory", "terror", "terrorise", "terrorism", "terrorize", "terrycloth", "terse", "tertian", "tertiary", "terylene", "tessellated", "test", "testament", "testamentary", "testate", "testator", "tester", "testicle", "testify", "testimonial", "testimony", "testis", "testy", "tetanus", "tetchy", "tether", "teutonic", "text", "textbook", "textile", "textual", "texture", "thalidomide", "than", "thane", "thank", "thankful", "thankless", "thanks", "tha
 nksgiving", "thankyou", "that", "thatch", "thaw", "the", "theater", "theatergoer", "theatre", "theatregoer", "theatrical", "theatricals", "thee", "theft", "thegn", "their", "theirs", "theism", "them", "theme", "themselves", "then", "thence", "thenceforth", "theocracy", "theocratic", "theodolite", "theologian", "theology", "theorem", "theoretical", "theoretically", "theorise", "theorist", "theorize", "theory", "theosophy", "therapeutic", "therapeutics", "therapist", "therapy", "there", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereinafter", "thereof", "thereon", "thereto", "thereunder", "thereupon", "therm", "thermal", "thermionic", "thermionics", "thermodynamics", "thermometer", "thermonuclear", "thermoplastic", "thermos", "thermosetting", "thermostat", "thesaurus", "these", "thesis", "thespian", "thews", "they", "thick", "thicken", "thickener", "thicket", "thickheaded", "thickness", "thickset", "thief", "thieve", "thieving", "thievish", "thigh", "thimble", "
 thimbleful", "thin", "thine", "thing", "thingamajig", "thingamujig", "things", "think", "thinkable", "thinking", "thinner", "third", "thirst", "thirsty", "thirteen", "thirty", "this", "thistle", "thistledown", "thither", "thole", "thong", "thorax", "thorn", "thorny", "thorough", "thoroughbred", "thoroughfare", "thoroughgoing", "those", "thou", "though", "thought", "thoughtful", "thoughtless", "thousand", "thraldom", "thrall", "thralldom", "thrash", "thrashing", "thread", "threadbare", "threadlike", "threat", "threaten", "three", "threepence", "threnody", "thresh", "thresher", "threshold", "threw", "thrice", "thrift", "thrifty", "thrill", "thriller", "thrive", "throat", "throaty", "throb", "throes", "thrombosis", "throne", "throng", "throstle", "throttle", "through", "throughout", "throughput", "throughway", "throw", "throwaway", "throwback", "thru", "thrum", "thrush", "thrust", "thruster", "thruway", "thud", "thug", "thuggery", "thumb", "thumbnail", "thumbscrew", "thumbtack", "thump
 ", "thumping", "thunder", "thunderbolt", "thunderclap", "thundercloud", "thundering", "thunderous", "thunderstorm", "thunderstruck", "thundery", "thurible", "thursday", "thus", "thwack", "thwart", "thy", "thyme", "thyroid", "thyself", "tiara", "tibia", "tic", "tick", "ticker", "tickertape", "ticket", "ticking", "tickle", "tickler", "ticklish", "tidal", "tidbit", "tiddler", "tiddley", "tiddleywinks", "tiddly", "tiddlywinks", "tide", "tidemark", "tidewater", "tideway", "tidings", "tidy", "tie", "tiebreaker", "tiepin", "tier", "tiff", "tiffin", "tig", "tiger", "tigerish", "tight", "tighten", "tightfisted", "tightrope", "tights", "tightwad", "tigress", "tike", "tilde", "tile", "till", "tillage", "tiller", "tilt", "timber", "timbered", "timberline", "timbre", "timbrel", "time", "timekeeper", "timeless", "timely", "timepiece", "timer", "times", "timesaving", "timeserver", "timeserving", "timetable", "timework", "timeworn", "timid", "timing", "timorous", "timothy", "timpani", "timpanist", 
 "tin", "tincture", "tinder", "tinderbox", "tinfoil", "ting", "tingaling", "tinge", "tingle", "tinker", "tinkle", "tinny", "tinplate", "tinsel", "tint", "tintack", "tintinnabulation", "tiny", "tip", "tippet", "tipple", "tipstaff", "tipster", "tipsy", "tiptoe", "tirade", "tire", "tired", "tireless", "tiresome", "tiro", "tissue", "tit", "titan", "titanic", "titanium", "titbit", "titfer", "tithe", "titillate", "titivate", "title", "titled", "titleholder", "titmouse", "titter", "tittivate", "tittle", "titty", "titular", "tizzy", "tnt", "toad", "toadstool", "toady", "toast", "toaster", "toastmaster", "tobacco", "tobacconist", "toboggan", "toccata", "tocsin", "tod", "today", "toddle", "toddler", "toddy", "toe", "toehold", "toenail", "toff", "toffee", "toffy", "tog", "toga", "together", "togetherness", "toggle", "togs", "toil", "toilet", "toiletries", "toiletry", "toils", "tokay", "token", "told", "tolerable", "tolerably", "tolerance", "tolerant", "tolerate", "toleration", "toll", "tollgate
 ", "tollhouse", "tomahawk", "tomato", "tomb", "tombola", "tomboy", "tombstone", "tomcat", "tome", "tomfoolery", "tommyrot", "tomorrow", "tomtit", "ton", "tonal", "tonality", "tone", "toneless", "tong", "tongs", "tongue", "tonic", "tonight", "tonnage", "tonne", "tonsil", "tonsilitis", "tonsillitis", "tonsorial", "tonsure", "tontine", "too", "took", "tool", "toot", "tooth", "toothache", "toothbrush", "toothcomb", "toothpaste", "toothpick", "toothsome", "toothy", "tootle", "toots", "tootsie", "top", "topaz", "topcoat", "topdressing", "topee", "topgallant", "topi", "topiary", "topic", "topical", "topicality", "topknot", "topless", "topmast", "topmost", "topographer", "topographical", "topography", "topper", "topping", "topple", "tops", "topsail", "topside", "topsoil", "topspin", "toque", "tor", "torch", "torchlight", "tore", "toreador", "torment", "tormentor", "torn", "tornado", "torpedo", "torpid", "torpor", "torque", "torrent", "torrential", "torrid", "torsion", "torso", "tort", "tort
 illa", "tortoise", "tortoiseshell", "tortuous", "torture", "tory", "toss", "tot", "total", "totalisator", "totalitarian", "totalitarianism", "totality", "totalizator", "tote", "totem", "totter", "tottery", "toucan", "touch", "touchdown", "touched", "touching", "touchline", "touchstone", "touchy", "tough", "toughen", "toupee", "tour", "tourism", "tourist", "tournament", "tourney", "tourniquet", "tousle", "tout", "tow", "towards", "towel", "toweling", "towelling", "tower", "towering", "towline", "town", "townscape", "township", "townsman", "townspeople", "towpath", "toxaemia", "toxemia", "toxic", "toxicologist", "toxicology", "toxin", "toy", "toyshop", "trace", "tracer", "tracery", "trachea", "trachoma", "tracing", "track", "trackless", "tracksuit", "tract", "tractable", "traction", "tractor", "trad", "trade", "trademark", "trader", "trades", "tradesman", "tradespeople", "tradition", "traditional", "traditionalism", "traduce", "traffic", "trafficator", "trafficker", "tragedian", "trag
 edienne", "tragedy", "tragic", "tragicomedy", "trail", "trailer", "train", "trainbearer", "trainee", "training", "trainman", "traipse", "trait", "traitor", "traitorous", "trajectory", "tram", "tramline", "trammel", "trammels", "tramp", "trample", "trampoline", "trance", "tranny", "tranquil", "tranquiliser", "tranquillise", "tranquillize", "tranquillizer", "transact", "transaction", "transactions", "transalpine", "transatlantic", "transcend", "transcendence", "transcendent", "transcendental", "transcendentalism", "transcontinental", "transcribe", "transcript", "transcription", "transept", "transfer", "transference", "transfiguration", "transfigure", "transfix", "transform", "transformation", "transformer", "transfuse", "transgress", "tranship", "transience", "transient", "transistor", "transistorise", "transistorize", "transit", "transition", "transitive", "translate", "translator", "transliterate", "translucence", "translucent", "transmigration", "transmission", "transmit", "transmi
 tter", "transmogrify", "transmute", "transoceanic", "transom", "transparency", "transparent", "transpiration", "transpire", "transplant", "transpolar", "transport", "transportation", "transporter", "transpose", "transship", "transubstantiation", "transverse", "transvestism", "transvestite", "trap", "trapdoor", "trapeze", "trapezium", "trapezoid", "trapper", "trappings", "trappist", "trapse", "trapshooting", "trash", "trashcan", "trashy", "trauma", "traumatic", "travail", "travel", "traveled", "traveler", "travelled", "traveller", "travelog", "travelogue", "travels", "travelsick", "traverse", "travesty", "trawl", "trawler", "tray", "treacherous", "treachery", "treacle", "treacly", "tread", "treadle", "treadmill", "treason", "treasonable", "treasure", "treasurer", "treasury", "treat", "treatise", "treatment", "treaty", "treble", "tree", "trefoil", "trek", "trellis", "tremble", "tremendous", "tremolo", "tremor", "tremulous", "trench", "trenchant", "trencher", "trencherman", "trend", "t
 rendsetter", "trendy", "trepan", "trephine", "trepidation", "trespass", "tresses", "trestle", "trews", "triad", "trial", "triangle", "triangular", "tribal", "tribalism", "tribe", "tribesman", "tribulation", "tribunal", "tribune", "tributary", "tribute", "trice", "triceps", "trichinosis", "trick", "trickery", "trickle", "trickster", "tricky", "tricolor", "tricolour", "tricycle", "trident", "triennial", "trier", "trifle", "trifler", "trifling", "trigger", "trigonometry", "trike", "trilateral", "trilby", "trilingual", "trill", "trillion", "trilobite", "trilogy", "trim", "trimaran", "trimester", "trimmer", "trimming", "trinitrotoluene", "trinity", "trinket", "trio", "trip", "tripartite", "triple", "triplet", "triplex", "triplicate", "tripod", "tripos", "tripper", "tripping", "triptych", "tripwire", "trireme", "trisect", "trite", "triumph", "triumphal", "triumphant", "triumvir", "triumvirate", "trivet", "trivia", "trivial", "trivialise", "triviality", "trivialize", "trochaic", "trochee",
  "trod", "trodden", "troglodyte", "troika", "trojan", "troll", "trolley", "trolleybus", "trollop", "trombone", "trombonist", "troop", "trooper", "troops", "troopship", "trope", "trophy", "tropic", "tropical", "tropics", "trot", "troth", "trotskyist", "trotter", "troubadour", "trouble", "troublemaker", "troubleshooter", "troublesome", "trough", "trounce", "troupe", "trouper", "trouser", "trousers", "trousseau", "trout", "trove", "trowel", "truancy", "truant", "truce", "truck", "trucking", "truckle", "truculence", "truculent", "trudge", "true", "trueborn", "truehearted", "truelove", "truffle", "trug", "truism", "truly", "trump", "trumpery", "trumpet", "trumps", "truncate", "truncheon", "trundle", "trunk", "trunks", "truss", "trust", "trustee", "trusteeship", "trustful", "trustworthy", "trusty", "truth", "truthful", "try", "tryst", "tsar", "tsarina", "tsp", "tub", "tuba", "tubby", "tube", "tubeless", "tuber", "tubercular", "tuberculosis", "tubful", "tubing", "tubular", "tuck", "tucker"
 , "tuckerbag", "tuesday", "tuft", "tug", "tugboat", "tuition", "tulip", "tulle", "tumble", "tumbledown", "tumbler", "tumbleweed", "tumbrel", "tumbril", "tumescent", "tumid", "tummy", "tumor", "tumour", "tumult", "tumultuous", "tumulus", "tun", "tuna", "tundra", "tune", "tuneful", "tuneless", "tuner", "tungsten", "tunic", "tunnel", "tunny", "tup", "tuppence", "tuppenny", "turban", "turbid", "turbine", "turbojet", "turboprop", "turbot", "turbulence", "turbulent", "turd", "tureen", "turf", "turgid", "turkey", "turmeric", "turmoil", "turn", "turnabout", "turncoat", "turncock", "turner", "turning", "turnip", "turnkey", "turnout", "turnover", "turnpike", "turnstile", "turntable", "turpentine", "turpitude", "turquoise", "turret", "turtle", "turtledove", "turtleneck", "tush", "tusk", "tusker", "tussle", "tussock", "tut", "tutelage", "tutelary", "tutor", "tutorial", "tutu", "tuxedo", "twaddle", "twain", "twang", "twat", "tweak", "twee", "tweed", "tweeds", "tweedy", "tweet", "tweeter", "tweez
 ers", "twelfth", "twelve", "twelvemonth", "twenty", "twerp", "twice", "twiddle", "twig", "twilight", "twill", "twin", "twinge", "twinkle", "twinkling", "twirl", "twirp", "twist", "twister", "twit", "twitch", "twitter", "twixt", "two", "twofaced", "twopence", "twopenny", "twosome", "tycoon", "tyke", "tympanum", "type", "typecast", "typeface", "typescript", "typesetter", "typewriter", "typewritten", "typhoid", "typhoon", "typhus", "typical", "typically", "typify", "typist", "typographer", "typographic", "typography", "tyrannical", "tyrannise", "tyrannize", "tyrannosaurus", "tyranny", "tyrant", "tyre", "tyro", "tzar", "tzarina", "ubiquitous", "ucca", "udder", "ufo", "ugh", "ugly", "uhf", "ukulele", "ulcer", "ulcerate", "ulcerous", "ullage", "ulna", "ult", "ulterior", "ultimate", "ultimately", "ultimatum", "ultimo", "ultramarine", "ultrasonic", "ultraviolet", "umber", "umbrage", "umbrella", "umlaut", "umpire", "umpteen", "unabashed", "unabated", "unable", "unabridged", "unaccompanied", 
 "unaccountable", "unaccustomed", "unadopted", "unadulterated", "unadvised", "unaffected", "unalloyed", "unanimous", "unannounced", "unanswerable", "unapproachable", "unarmed", "unasked", "unassuming", "unattached", "unattended", "unavailing", "unawares", "unbalance", "unbar", "unbearable", "unbearably", "unbeknown", "unbelief", "unbelievable", "unbeliever", "unbelieving", "unbend", "unbending", "unbidden", "unbind", "unblushing", "unborn", "unbosom", "unbounded", "unbowed", "unbridled", "unbuckle", "unburden", "unbuttoned", "uncanny", "unceremonious", "uncertain", "uncertainty", "uncharitable", "uncharted", "unchecked", "unchristian", "unclad", "uncle", "unclean", "unclouded", "uncolored", "uncoloured", "uncomfortable", "uncommitted", "uncommonly", "uncompromising", "unconcerned", "unconditional", "unconscionable", "unconscious", "unconsidered", "uncork", "uncouple", "uncouth", "uncover", "uncritical", "uncrowned", "uncrushable", "unction", "unctuous", "uncut", "undaunted", "undecei
 ve", "undecided", "undeclared", "undeniable", "under", "underact", "underarm", "underbelly", "underbrush", "undercarriage", "undercharge", "underclothes", "undercoat", "undercover", "undercurrent", "undercut", "underdog", "underdone", "underestimate", "underfelt", "underfloor", "underfoot", "undergarment", "undergo", "undergraduate", "underground", "undergrowth", "underhand", "underhanded", "underhung", "underlay", "underlie", "underline", "underling", "underlying", "undermanned", "undermentioned", "undermine", "underneath", "undernourish", "underpants", "underpass", "underpin", "underplay", "underprivileged", "underproof", "underquote", "underrate", "underscore", "undersecretary", "undersell", "undersexed", "undershirt", "underside", "undersigned", "undersized", "underslung", "understaffed", "understand", "understanding", "understate", "understatement", "understudy", "undertake", "undertaker", "undertaking", "undertone", "undertow", "underwater", "underwear", "underweight", "underw
 ent", "underworld", "underwrite", "underwriter", "undesirable", "undeveloped", "undies", "undischarged", "undistinguished", "undivided", "undo", "undoing", "undomesticated", "undone", "undoubted", "undress", "undressed", "undue", "undulate", "undulation", "unduly", "undying", "unearth", "unearthly", "unease", "uneasy", "uneconomic", "uneducated", "unemployed", "unemployment", "unenlightened", "unenviable", "unequal", "unequaled", "unequalled", "unequivocal", "unerring", "unesco", "uneven", "uneventful", "unexampled", "unexceptionable", "unfailing", "unfaithful", "unfaltering", "unfathomable", "unfathomed", "unfavorable", "unfavourable", "unfeeling", "unfettered", "unfit", "unflagging", "unflappable", "unflinching", "unfold", "unforeseen", "unforgettable", "unfortunate", "unfortunately", "unfounded", "unfrequented", "unfrock", "unfurl", "ungainly", "ungenerous", "ungodly", "ungovernable", "ungracious", "ungrateful", "ungrudging", "unguarded", "unguent", "unhallowed", "unhand", "unhap
 pily", "unhappy", "unhealthy", "unheard", "unhinge", "unholy", "unhook", "unhorse", "unicef", "unicorn", "unidentified", "unification", "uniform", "uniformed", "unify", "unilateral", "unimpeachable", "uninformed", "uninhabitable", "uninhibited", "uninterested", "uninterrupted", "union", "unionise", "unionism", "unionist", "unionize", "unique", "unisex", "unison", "unit", "unitarian", "unite", "united", "unity", "universal", "universally", "universe", "university", "unkempt", "unkind", "unkindly", "unknowing", "unknown", "unlawful", "unlearn", "unleash", "unleavened", "unless", "unlettered", "unlike", "unlikely", "unload", "unlock", "unloose", "unloosen", "unmade", "unmannerly", "unmarried", "unmask", "unmatched", "unmeasured", "unmentionable", "unmentionables", "unmindful", "unmistakable", "unmitigated", "unmoved", "unnatural", "unnecessary", "unnerve", "unnumbered", "uno", "unobtrusive", "unofficial", "unorthodox", "unpack", "unparalleled", "unparliamentary", "unperson", "unpick", 
 "unplaced", "unplayable", "unpleasant", "unplumbed", "unpracticed", "unpractised", "unprecedented", "unprejudiced", "unpretentious", "unprincipled", "unprintable", "unprofessional", "unprompted", "unprovoked", "unqualified", "unquestionable", "unquestioning", "unquiet", "unquote", "unravel", "unreadable", "unreal", "unreasonable", "unreasoning", "unrelenting", "unrelieved", "unremitting", "unrequited", "unreserved", "unrest", "unrestrained", "unrip", "unrivaled", "unrivalled", "unroll", "unruffled", "unruly", "unsaddle", "unsaid", "unsavory", "unsavoury", "unsay", "unscathed", "unschooled", "unscramble", "unscrew", "unscripted", "unscrupulous", "unseat", "unseeing", "unseemly", "unseen", "unserviceable", "unsettle", "unsettled", "unsex", "unsexed", "unshakable", "unshakeable", "unshod", "unsightly", "unskilled", "unsociable", "unsocial", "unsophisticated", "unsound", "unsparing", "unspeakable", "unspotted", "unstop", "unstrung", "unstuck", "unstudied", "unsullied", "unsung", "unswer
 ving", "untangle", "untapped", "untenable", "unthinkable", "unthinking", "untie", "until", "untimely", "untinged", "untiring", "unto", "untold", "untouchable", "untoward", "untruth", "untruthful", "untutored", "unused", "unusual", "unusually", "unutterable", "unvarnished", "unveil", "unversed", "unvoiced", "unwarranted", "unwed", "unwell", "unwieldy", "unwind", "unwitting", "unwonted", "unzip", "upbeat", "upbraid", "upbringing", "upcoming", "update", "upend", "upgrade", "upheaval", "uphill", "uphold", "upholster", "upholsterer", "upholstery", "upkeep", "upland", "uplift", "upon", "upper", "uppercut", "uppermost", "uppish", "uppity", "upright", "uprising", "uproar", "uproarious", "uproot", "upset", "upshot", "upstage", "upstairs", "upstanding", "upstart", "upstream", "upsurge", "upswing", "uptake", "uptight", "uptown", "upturn", "upturned", "upward", "upwards", "uranium", "uranus", "urban", "urbane", "urbanise", "urbanize", "urchin", "urge", "urgent", "uric", "urinal", "urinary", "ur
 inate", "urine", "urn", "usage", "use", "useful", "usefulness", "useless", "user", "usher", "usherette", "ussr", "usual", "usually", "usurer", "usurious", "usurp", "usury", "utensil", "uterine", "uterus", "utilise", "utilitarian", "utilitarianism", "utility", "utilize", "utmost", "utopia", "utopian", "utter", "utterance", "utterly", "uvula", "uvular", "uxorious", "vac", "vacancy", "vacant", "vacate", "vacation", "vaccinate", "vaccination", "vaccine", "vacillate", "vacuity", "vacuous", "vacuum", "vagabond", "vagary", "vagina", "vaginal", "vagrancy", "vagrant", "vague", "vain", "vainglorious", "vainglory", "valance", "vale", "valediction", "valedictory", "valency", "valentine", "valerian", "valet", "valetudinarian", "valiant", "valiantly", "valid", "validate", "valise", "valley", "valor", "valour", "valse", "valuable", "valuation", "value", "valuer", "valve", "valvular", "vamoose", "vamp", "vampire", "van", "vanadium", "vandal", "vandalise", "vandalism", "vandalize", "vane", "vanguard
 ", "vanilla", "vanish", "vanity", "vanquish", "vantagepoint", "vapid", "vapidity", "vapor", "vaporise", "vaporize", "vaporous", "vapors", "vapour", "vapours", "variability", "variable", "variance", "variant", "variation", "varicolored", "varicoloured", "varicose", "varied", "variegated", "variegation", "variety", "variform", "variorum", "various", "variously", "varlet", "varmint", "varnish", "varsity", "vary", "vascular", "vase", "vasectomy", "vaseline", "vassal", "vassalage", "vast", "vastly", "vastness", "vat", "vatican", "vaudeville", "vault", "vaulted", "vaulting", "vaunt", "veal", "vector", "veer", "veg", "vegan", "vegetable", "vegetarian", "vegetarianism", "vegetate", "vegetation", "vehement", "vehicle", "vehicular", "veil", "veiled", "vein", "veined", "veining", "velar", "velarize", "veld", "veldt", "vellum", "velocipede", "velocity", "velour", "velours", "velvet", "velveteen", "velvety", "venal", "vend", "vendee", "vender", "vendetta", "vendor", "veneer", "venerable", "vener
 ate", "venereal", "vengeance", "vengeful", "venial", "venison", "venom", "venomous", "venous", "vent", "ventilate", "ventilation", "ventilator", "ventricle", "ventriloquism", "ventriloquist", "venture", "venturer", "venturesome", "venue", "veracious", "veracity", "veranda", "verandah", "verb", "verbal", "verbalise", "verbalize", "verbally", "verbatim", "verbena", "verbiage", "verbose", "verbosity", "verdant", "verdict", "verdigris", "verdure", "verge", "verger", "verify", "verily", "verisimilitude", "veritable", "verity", "vermicelli", "vermiculite", "vermiform", "vermifuge", "vermilion", "vermin", "verminous", "vermouth", "vernacular", "vernal", "veronal", "veronica", "verruca", "versatile", "verse", "versed", "versification", "versify", "version", "verso", "versus", "vertebra", "vertebrate", "vertex", "vertical", "vertiginous", "vertigo", "verve", "very", "vesicle", "vesicular", "vesper", "vespers", "vessel", "vest", "vestibule", "vestige", "vestigial", "vestment", "vestry", "vest
 ryman", "vesture", "vet", "vetch", "veteran", "veterinary", "veto", "vex", "vexation", "vexatious", "vhf", "via", "viable", "viaduct", "vial", "viands", "vibes", "vibrancy", "vibrant", "vibraphone", "vibrate", "vibration", "vibrato", "vibrator", "vicar", "vicarage", "vicarious", "vice", "vicelike", "viceregal", "vicereine", "viceroy", "vicinity", "vicious", "vicissitudes", "victim", "victimise", "victimize", "victor", "victorian", "victorious", "victory", "victual", "victualer", "victualler", "victuals", "vicuaa", "vicuana", "vide", "videlicet", "video", "videotape", "vie", "view", "viewer", "viewfinder", "viewless", "viewpoint", "vigil", "vigilance", "vigilant", "vigilante", "vignette", "vigor", "vigorous", "vigour", "viking", "vile", "vilification", "vilify", "villa", "village", "villager", "villain", "villainies", "villainous", "villainy", "villein", "villeinage", "villenage", "vim", "vinaigrette", "vindicate", "vindication", "vindictive", "vine", "vinegar", "vinegary", "vinery",
  "vineyard", "vino", "vinous", "vintage", "vintner", "vinyl", "viol", "viola", "violate", "violence", "violent", "violet", "violin", "violoncello", "vip", "viper", "virago", "virgin", "virginal", "virginals", "virginia", "virginity", "virgo", "virgule", "virile", "virility", "virologist", "virology", "virtu", "virtual", "virtually", "virtue", "virtuosity", "virtuoso", "virtuous", "virulence", "virulent", "virus", "visa", "visage", "viscera", "visceral", "viscosity", "viscount", "viscountcy", "viscountess", "viscous", "vise", "visibility", "visible", "visibly", "vision", "visionary", "visit", "visitant", "visitation", "visiting", "visitor", "visor", "vista", "visual", "visualise", "visualize", "visually", "vital", "vitalise", "vitality", "vitalize", "vitally", "vitals", "vitamin", "vitiate", "viticulture", "vitreous", "vitrify", "vitriol", "vitriolic", "vituperate", "vituperation", "vituperative", "vivace", "vivacious", "vivarium", "vivid", "viviparous", "vivisect", "vivisection", "v
 ivisectionist", "vixen", "vixenish", "vizier", "vocab", "vocabulary", "vocal", "vocalise", "vocalist", "vocalize", "vocation", "vocational", "vocative", "vociferate", "vociferation", "vociferous", "vodka", "vogue", "voice", "voiceless", "void", "voile", "vol", "volatile", "volcanic", "volcano", "vole", "volition", "volitional", "volley", "volleyball", "volt", "voltage", "voluble", "volume", "volumes", "voluminous", "voluntary", "volunteer", "voluptuary", "voluptuous", "volute", "vomit", "voodoo", "voracious", "vortex", "votary", "vote", "voter", "votive", "vouch", "voucher", "vouchsafe", "vow", "vowel", "voyage", "voyager", "voyages", "voyeur", "vtol", "vulcanise", "vulcanite", "vulcanize", "vulgar", "vulgarian", "vulgarise", "vulgarism", "vulgarity", "vulgarize", "vulgate", "vulnerable", "vulpine", "vulture", "vulva", "wac", "wack", "wacky", "wad", "wadding", "waddle", "wade", "wader", "wadge", "wadi", "wady", "wafer", "waffle", "waft", "wag", "wage", "wager", "wages", "waggery", "
 waggish", "waggle", "waggon", "waggoner", "waggonette", "wagon", "wagoner", "wagonette", "wagtail", "waif", "wail", "wain", "wainscot", "waist", "waistband", "waistcoat", "waistline", "wait", "waiter", "waits", "waive", "waiver", "wake", "wakeful", "waken", "waking", "walk", "walkabout", "walkaway", "walker", "walking", "walkout", "walkover", "wall", "walla", "wallaby", "wallah", "wallet", "wallflower", "wallop", "walloping", "wallow", "wallpaper", "walnut", "walrus", "waltz", "wampum", "wan", "wand", "wander", "wanderer", "wandering", "wanderings", "wanderlust", "wane", "wangle", "wank", "wanker", "want", "wanting", "wanton", "wants", "wapiti", "war", "warble", "warbler", "ward", "warden", "warder", "wardrobe", "wardroom", "warehouse", "wares", "warfare", "warhead", "warhorse", "warily", "warlike", "warlock", "warlord", "warm", "warmonger", "warmth", "warn", "warning", "warp", "warpath", "warrant", "warrantee", "warrantor", "warranty", "warren", "warrior", "warship", "wart", "warth
 og", "wartime", "wary", "was", "wash", "washable", "washbasin", "washboard", "washbowl", "washcloth", "washday", "washer", "washerwoman", "washhouse", "washing", "washout", "washroom", "washstand", "washwoman", "washy", "wasp", "waspish", "wassail", "wast", "wastage", "waste", "wasteful", "waster", "wastrel", "watch", "watchband", "watchdog", "watches", "watchful", "watchmaker", "watchman", "watchtower", "watchword", "water", "waterborne", "watercolor", "watercolour", "watercourse", "watercress", "waterfall", "waterfowl", "waterfront", "waterhole", "waterline", "waterlogged", "waterloo", "waterman", "watermark", "watermelon", "watermill", "waterpower", "waterproof", "waters", "watershed", "waterside", "waterspout", "watertight", "waterway", "waterwheel", "waterwings", "waterworks", "watery", "watt", "wattage", "wattle", "wave", "wavelength", "waver", "wavy", "wax", "waxen", "waxworks", "waxy", "way", "waybill", "wayfarer", "wayfaring", "waylay", "ways", "wayside", "wayward", "weak",
  "weaken", "weakling", "weakness", "weal", "weald", "wealth", "wealthy", "wean", "weapon", "weaponry", "wear", "wearing", "wearisome", "weary", "weasel", "weather", "weatherboard", "weathercock", "weatherglass", "weatherman", "weatherproof", "weathers", "weave", "weaver", "web", "webbed", "webbing", "wed", "wedded", "wedding", "wedge", "wedged", "wedgwood", "wedlock", "wednesday", "wee", "weed", "weeds", "weedy", "week", "weekday", "weekend", "weekender", "weekly", "weeknight", "weeny", "weep", "weeping", "weepy", "weevil", "weft", "weigh", "weighbridge", "weight", "weighted", "weighting", "weightless", "weighty", "weir", "weird", "weirdie", "weirdo", "welch", "welcome", "weld", "welder", "welfare", "welkin", "well", "wellbeing", "wellborn", "wellington", "wellspring", "welsh", "welt", "weltanschauung", "welter", "welterweight", "wen", "wench", "wend", "wensleydale", "went", "wept", "were", "werewolf", "wert", "wesleyan", "west", "westbound", "westerly", "western", "westerner", "wes
 ternise", "westernize", "westernmost", "westward", "westwards", "wet", "wether", "wetting", "whack", "whacked", "whacker", "whacking", "whale", "whalebone", "whaler", "whaling", "wham", "wharf", "what", "whatever", "whatnot", "wheat", "wheaten", "wheedle", "wheel", "wheelbarrow", "wheelbase", "wheelchair", "wheelhouse", "wheeling", "wheels", "wheelwright", "wheeze", "wheezy", "whelk", "whelp", "when", "whence", "whenever", "where", "whereabouts", "whereas", "whereat", "whereby", "wherefore", "wherefores", "wherein", "whereof", "whereon", "wheresoever", "whereto", "whereupon", "wherever", "wherewithal", "wherry", "whet", "whether", "whetstone", "whew", "whey", "which", "whichever", "whiff", "whiffy", "whig", "while", "whim", "whimper", "whimsey", "whimsical", "whimsicality", "whimsy", "whin", "whine", "whiner", "whinny", "whip", "whipcord", "whiplash", "whippersnapper", "whippet", "whipping", "whippoorwill", "whippy", "whir", "whirl", "whirligig", "whirlpool", "whirlwind", "whirlybir
 d", "whirr", "whisk", "whisker", "whiskered", "whiskers", "whiskey", "whisky", "whisper", "whist", "whistle", "whit", "white", "whitebait", "whitehall", "whiten", "whitening", "whites", "whitethorn", "whitethroat", "whitewash", "whither", "whiting", "whitlow", "whitsun", "whitsuntide", "whittle", "whiz", "whizz", "who", "whoa", "whodunit", "whoever", "whole", "wholemeal", "wholesale", "wholesaler", "wholesome", "wholly", "whom", "whoop", "whoopee", "whoosh", "whop", "whopper", "whopping", "whore", "whorehouse", "whoremonger", "whorl", "whortleberry", "whose", "whosoever", "why", "whys", "wick", "wicked", "wicker", "wickerwork", "wicket", "wide", "widely", "widen", "widespread", "widgeon", "widow", "widowed", "widower", "widowhood", "width", "wield", "wife", "wifely", "wig", "wigged", "wigging", "wiggle", "wight", "wigwam", "wilco", "wild", "wildcat", "wildebeest", "wilderness", "wildfire", "wildfowl", "wildlife", "wildly", "wile", "wiles", "wilful", "wiliness", "will", "willful", "w
 illies", "willing", "willow", "willowy", "willpower", "wilt", "wily", "wimple", "wimpy", "win", "wince", "winceyette", "winch", "wind", "windbag", "windbreak", "windcheater", "windfall", "windily", "winding", "windjammer", "windlass", "windless", "windmill", "window", "windowpane", "windowsill", "windpipe", "windscreen", "windshield", "windsock", "windstorm", "windswept", "windward", "windy", "wine", "winebibbing", "wineglass", "winepress", "wineskin", "wing", "winger", "wings", "wingspan", "wink", "winkers", "winkle", "winner", "winning", "winnings", "winnow", "winsome", "winter", "wintergreen", "wintertime", "wintry", "wipe", "wiper", "wire", "wirecutters", "wireless", "wiretap", "wireworm", "wiring", "wiry", "wisdom", "wise", "wisecrack", "wish", "wishbone", "wisp", "wispy", "wisteria", "wistful", "wit", "witch", "witchcraft", "witchdoctor", "witchery", "witching", "with", "withal", "withdraw", "withdrawal", "withdrawn", "withe", "wither", "withering", "withers", "withhold", "wit
 hin", "without", "withstand", "withy", "witless", "witness", "witticism", "witting", "witty", "wives", "wizard", "wizardry", "wizened", "woad", "wobble", "wobbly", "woe", "woebegone", "woeful", "wog", "woke", "woken", "wold", "wolf", "wolfhound", "wolfram", "wolfsbane", "woman", "womanhood", "womanise", "womanish", "womanize", "womankind", "womanly", "womb", "wombat", "womenfolk", "won", "wonder", "wonderful", "wonderland", "wonderment", "wonders", "wondrous", "wonky", "wont", "wonted", "woo", "wood", "woodbine", "woodblock", "woodcock", "woodcraft", "woodcut", "woodcutter", "wooded", "wooden", "woodenheaded", "woodland", "woodlouse", "woodpecker", "woodpile", "woodshed", "woodsman", "woodwind", "woodwork", "woodworm", "woody", "wooer", "woof", "woofer", "wool", "woolen", "woolens", "woolgather", "woolgathering", "woollen", "woollens", "woolly", "woolsack", "woozy", "wop", "word", "wording", "wordless", "wordplay", "words", "wordy", "wore", "work", "workable", "workaday", "workbag",
  "workbasket", "workbench", "workbook", "workday", "worker", "workhorse", "workhouse", "working", "workings", "workman", "workmanlike", "workmanship", "workout", "workpeople", "workroom", "works", "workshop", "worktop", "world", "worldly", "worldshaking", "worldwide", "worm", "wormhole", "wormwood", "wormy", "worn", "worried", "worrisome", "worry", "worse", "worsen", "worship", "worshipful", "worst", "worsted", "wort", "worth", "worthless", "worthwhile", "worthy", "wot", "wotcher", "would", "wouldst", "wound", "wove", "woven", "wow", "wrac", "wrack", "wraith", "wrangle", "wrangler", "wrap", "wrapper", "wrapping", "wrath", "wreak", "wreath", "wreathe", "wreck", "wreckage", "wrecker", "wren", "wrench", "wrest", "wrestle", "wretch", "wretched", "wriggle", "wright", "wring", "wringer", "wrinkle", "wrist", "wristband", "wristlet", "wristwatch", "wristy", "writ", "write", "writer", "writhe", "writing", "writings", "written", "wrong", "wrongdoing", "wrongful", "wrongheaded", "wrote", "wrot
 h", "wrought", "wrung", "wry", "wurst", "wyvern", "xenon", "xenophobia", "xerox", "xylophone", "yacht", "yachting", "yachtsman", "yahoo", "yak", "yam", "yammer", "yang", "yank", "yankee", "yap", "yard", "yardage", "yardarm", "yardstick", "yarn", "yarrow", "yashmak", "yaw", "yawl", "yawn", "yaws", "yea", "yeah", "year", "yearbook", "yearling", "yearlong", "yearly", "yearn", "yearning", "years", "yeast", "yeasty", "yell", "yellow", "yelp", "yen", "yeoman", "yeomanry", "yes", "yesterday", "yet", "yeti", "yew", "yid", "yiddish", "yield", "yielding", "yin", "yippee", "yobbo", "yodel", "yoga", "yoghurt", "yogi", "yogurt", "yoke", "yokel", "yolk", "yonder", "yonks", "yore", "yorker", "you", "young", "younger", "youngster", "your", "yours", "yourself", "youth", "youthful", "yowl", "yoyo", "yucca", "yule", "yuletide", "zany", "zeal", "zealot", "zealotry", "zealous", "zebra", "zebu", "zed", "zeitgeist", "zen", "zenana", "zenith", "zephyr", "zeppelin", "zero", "zest", "ziggurat", "zigzag", "zi
 nc", "zinnia", "zionism", "zip", "zipper", "zippy", "zither", "zizz", "zodiac", "zombi", "zombie", "zonal", "zone", "zoning", "zonked", "zoo", "zoologist", "zoology", "zoom", "zoophyte", "zouave", "zucchini", "zulu" };
+        internal static string[] data = new string[] {
+            "tenor","tenpin","tense","tensile","tension",
+            "tent","tentacle","tentative","tenterhooks","tenuity",
+            "tenuous","tenure","tepee","tepid","tequila",
+            "tercentenary","tercentennial","term","termagant","terminable",
+            "terminal","terminate","termination","terminology","terminus",
+            "termite","terms","tern","terpsichorean","terrace",
+            "terracotta","terrain","terrapin","terrestrial","terrible",
+            "terribly","terrier","terrific","terrifically","terrify",
+            "territorial","territory","terror","terrorise","terrorism",
+            "terrorize","terrycloth","terse","tertian","tertiary",
+            "terylene","tessellated","test","testament","testamentary",
+            "testate","testator","tester","testicle","testify",
+            "testimonial","testimony","testis","testy","tetanus",
+            "tetchy","tether","teutonic","text","textbook",
+            "textile","textual","texture","thalidomide","than",
+            "thane","thank","thankful","thankless","thanks",
+            "thanksgiving","thankyou","that","thatch","thaw",
+            "the","theater","theatergoer","theatre","theatregoer",
+            "theatrical","theatricals","thee","theft","thegn",
+            "their","theirs","theism","them","theme",
+            "themselves","then","thence","thenceforth","theocracy",
+            "theocratic","theodolite","theologian","theology","theorem",
+            "theoretical","theoretically","theorise","theorist","theorize",
+            "theory","theosophy","therapeutic","therapeutics","therapist",
+            "therapy","there","thereabouts","thereafter","thereby",
+            "therefore","therein","thereinafter","thereof","thereon",
+            "thereto","thereunder","thereupon","therm","thermal",
+            "thermionic","thermionics","thermodynamics","thermometer","thermonuclear",
+            "thermoplastic","thermos","thermosetting","thermostat","thesaurus",
+            "these","thesis","thespian","thews","they",
+            "thick","thicken","thickener","thicket","thickheaded",
+            "thickness","thickset","thief","thieve","thieving",
+            "thievish","thigh","thimble","thimbleful","thin",
+            "thine","thing","thingamajig","thingamujig","things",
+            "think","thinkable","thinking","thinner","third",
+            "thirst","thirsty","thirteen","thirty","this",
+            "thistle","thistledown","thither","thole","thong",
+            "thorax","thorn","thorny","thorough","thoroughbred",
+            "thoroughfare","thoroughgoing","those","thou","though",
+            "thought","thoughtful","thoughtless","thousand","thraldom",
+            "thrall","thralldom","thrash","thrashing","thread",
+            "threadbare","threadlike","threat","threaten","three",
+            "threepence","threnody","thresh","thresher","threshold",
+            "threw","thrice","thrift","thrifty","thrill",
+            "thriller","thrive","throat","throaty","throb",
+            "throes","thrombosis","throne","throng","throstle",
+            "throttle","through","throughout","throughput","throughway",
+            "throw","throwaway","throwback","thru","thrum",
+            "thrush","thrust","thruster","thruway","thud",
+            "thug","thuggery","thumb","thumbnail","thumbscrew",
+            "thumbtack","thump","thumping","thunder","thunderbolt",
+            "thunderclap","thundercloud","thundering","thunderous","thunderstorm",
+            "thunderstruck","thundery","thurible","thursday","thus",
+            "thwack","thwart","thy","thyme","thyroid",
+            "thyself","tiara","tibia","tic","tick",
+            "ticker","tickertape","ticket","ticking","tickle",
+            "tickler","ticklish","tidal","tidbit","tiddler",
+            "tiddley","tiddleywinks","tiddly","tiddlywinks","tide",
+            "tidemark","tidewater","tideway","tidings","tidy",
+            "tie","tiebreaker","tiepin","tier","tiff",
+            "tiffin","tig","tiger","tigerish","tight",
+            "tighten","tightfisted","tightrope","tights","tightwad",
+            "tigress","tike","tilde","tile","till",
+            "tillage","tiller","tilt","timber","timbered",
+            "timberline","timbre","timbrel","time","timekeeper",
+            "timeless","timely","timepiece","timer","times",
+            "timesaving","timeserver","timeserving","timetable","timework",
+            "timeworn","timid","timing","timorous","timothy",
+            "timpani","timpanist","tin","tincture","tinder",
+            "tinderbox","tinfoil","ting","tingaling","tinge",
+            "tingle","tinker","tinkle","tinny","tinplate",
+            "tinsel","tint","tintack","tintinnabulation","tiny",
+            "tip","tippet","tipple","tipstaff","tipster",
+            "tipsy","tiptoe","tirade","tire","tired",
+            "tireless","tiresome","tiro","tissue","tit",
+            "titan","titanic","titanium","titbit","titfer",
+            "tithe","titillate","titivate","title","titled",
+            "titleholder","titmouse","titter","tittivate","tittle",
+            "titty","titular","tizzy","tnt","toad",
+            "toadstool","toady","toast","toaster","toastmaster",
+            "tobacco","tobacconist","toboggan","toccata","tocsin",
+            "tod","today","toddle","toddler","toddy",
+            "toe","toehold","toenail","toff","toffee",
+            "toffy","tog","toga","together","togetherness",
+            "toggle","togs","toil","toilet","toiletries",
+            "toiletry","toils","tokay","token","told",
+            "tolerable","tolerably","tolerance","tolerant","tolerate",
+            "toleration","toll","tollgate","tollhouse","tomahawk",
+            "tomato","tomb","tombola","tomboy","tombstone",
+            "tomcat","tome","tomfoolery","tommyrot","tomorrow",
+            "tomtit","ton","tonal","tonality","tone",
+            "toneless","tong","tongs","tongue","tonic",
+            "tonight","tonnage","tonne","tonsil","tonsilitis",
+            "tonsillitis","tonsorial","tonsure","tontine","too",
+            "took","tool","toot","tooth","toothache",
+            "toothbrush","toothcomb","toothpaste","toothpick","toothsome",
+            "toothy","tootle","toots","tootsie","top",
+            "topaz","topcoat","topdressing","topee","topgallant",
+            "topi","topiary","topic","topical","topicality",
+            "topknot","topless","topmast","topmost","topographer",
+            "topographical","topography","topper","topping","topple",
+            "tops","topsail","topside","topsoil","topspin",
+            "toque","tor","torch","torchlight","tore",
+            "toreador","torment","tormentor","torn","tornado",
+            "torpedo","torpid","torpor","torque","torrent",
+            "torrential","torrid","torsion","torso","tort",
+            "tortilla","tortoise","tortoiseshell","tortuous","torture",
+            "tory","toss","tot","total","totalisator",
+            "totalitarian","totalitarianism","totality","totalizator","tote",
+            "totem","totter","tottery","toucan","touch",
+            "touchdown","touched","touching","touchline","touchstone",
+            "touchy","tough","toughen","toupee","tour",
+            "tourism","tourist","tournament","tourney","tourniquet",
+            "tousle","tout","tow","towards","towel",
+            "toweling","towelling","tower","towering","towline",
+            "town","townscape","township","townsman","townspeople",
+            "towpath","toxaemia","toxemia","toxic","toxicologist",
+            "toxicology","toxin","toy","toyshop","trace",
+            "tracer","tracery","trachea","trachoma","tracing",
+            "track","trackless","tracksuit","tract","tractable",
+            "traction","tractor","trad","trade","trademark",
+            "trader","trades","tradesman","tradespeople","tradition",
+            "traditional","traditionalism","traduce","traffic","trafficator",
+            "trafficker","tragedian","tragedienne","tragedy","tragic",
+            "tragicomedy","trail","trailer","train","trainbearer",
+            "trainee","training","trainman","traipse","trait",
+            "traitor","traitorous","trajectory","tram","tramline",
+            "trammel","trammels","tramp","trample","trampoline",
+            "trance","tranny","tranquil","tranquiliser","tranquillise",
+            "tranquillize","tranquillizer","transact","transaction","transactions",
+            "transalpine","transatlantic","transcend","transcendence","transcendent",
+            "transcendental","transcendentalism","transcontinental","transcribe","transcript",
+            "transcription","transept","transfer","transference","transfiguration",
+            "transfigure","transfix","transform","transformation","transformer",
+            "transfuse","transgress","tranship","transience","transient",
+            "transistor","transistorise","transistorize","transit","transition",
+            "transitive","translate","translator","transliterate","translucence",
+            "translucent","transmigration","transmission","transmit","transmitter",
+            "transmogrify","transmute","transoceanic","transom","transparency",
+            "transparent","transpiration","transpire","transplant","transpolar",
+            "transport","transportation","transporter","transpose","transship",
+            "transubstantiation","transverse","transvestism","transvestite","trap",
+            "trapdoor","trapeze","trapezium","trapezoid","trapper",
+            "trappings","trappist","trapse","trapshooting","trash",
+            "trashcan","trashy","trauma","traumatic","travail",
+            "travel","traveled","traveler","travelled","traveller",
+            "travelog","travelogue","travels","travelsick","traverse",
+            "travesty","trawl","trawler","tray","treacherous",
+            "treachery","treacle","treacly","tread","treadle",
+            "treadmill","treason","treasonable","treasure","treasurer",
+            "treasury","treat","treatise","treatment","treaty",
+            "treble","tree","trefoil","trek","trellis",
+            "tremble","tremendous","tremolo","tremor","tremulous",
+            "trench","trenchant","trencher","trencherman","trend",
+            "trendsetter","trendy","trepan","trephine","trepidation",
+            "trespass","tresses","trestle","trews","triad",
+            "trial","triangle","triangular","tribal","tribalism",
+            "tribe","tribesman","tribulation","tribunal","tribune",
+            "tributary","tribute","trice","triceps","trichinosis",
+            "trick","trickery","trickle","trickster","tricky",
+            "tricolor","tricolour","tricycle","trident","triennial",
+            "trier","trifle","trifler","trifling","trigger",
+            "trigonometry","trike","trilateral","trilby","trilingual",
+            "trill","trillion","trilobite","trilogy","trim",
+            "trimaran","trimester","trimmer","trimming","trinitrotoluene",
+            "trinity","trinket","trio","trip","tripartite",
+            "triple","triplet","triplex","triplicate","tripod",
+            "tripos","tripper","tripping","triptych","tripwire",
+            "trireme","trisect","trite","triumph","triumphal",
+            "triumphant","triumvir","triumvirate","trivet","trivia",
+            "trivial","trivialise","triviality","trivialize","trochaic",
+            "trochee","trod","trodden","troglodyte","troika",
+            "trojan","troll","trolley","trolleybus","trollop",
+            "trombone","trombonist","troop","trooper","troops",
+            "troopship","trope","trophy","tropic","tropical",
+            "tropics","trot","troth","trotskyist","trotter",
+            "troubadour","trouble","troublemaker","troubleshooter","troublesome",
+            "trough","trounce","troupe","trouper","trouser",
+            "trousers","trousseau","trout","trove","trowel",
+            "truancy","truant","truce","truck","trucking",
+            "truckle","truculence","truculent","trudge","true",
+            "trueborn","truehearted","truelove","truffle","trug",
+            "truism","truly","trump","trumpery","trumpet",
+            "trumps","truncate","truncheon","trundle","trunk",
+            "trunks","truss","trust","trustee","trusteeship",
+            "trustful","trustworthy","trusty","truth","truthful",
+            "try","tryst","tsar","tsarina","tsp",
+            "tub","tuba","tubby","tube","tubeless",
+            "tuber","tubercular","tuberculosis","tubful","tubing",
+            "tubular","tuck","tucker","tuckerbag","tuesday",
+            "tuft","tug","tugboat","tuition","tulip",
+            "tulle","tumble","tumbledown","tumbler","tumbleweed",
+            "tumbrel","tumbril","tumescent","tumid","tummy",
+            "tumor","tumour","tumult","tumultuous","tumulus",
+            "tun","tuna","tundra","tune","tuneful",
+            "tuneless","tuner","tungsten","tunic","tunnel",
+            "tunny","tup","tuppence","tuppenny","turban",
+            "turbid","turbine","turbojet","turboprop","turbot",
+            "turbulence","turbulent","turd","tureen","turf",
+            "turgid","turkey","turmeric","turmoil","turn",
+            "turnabout","turncoat","turncock","turner","turning",
+            "turnip","turnkey","turnout","turnover","turnpike",
+            "turnstile","turntable","turpentine","turpitude","turquoise",
+            "turret","turtle","turtledove","turtleneck","tush",
+            "tusk","tusker","tussle","tussock","tut",
+            "tutelage","tutelary","tutor","tutorial","tutu",
+            "tuxedo","twaddle","twain","twang","twat",
+            "tweak","twee","tweed","tweeds","tweedy",
+            "tweet","tweeter","tweezers","twelfth","twelve",
+            "twelvemonth","twenty","twerp","twice","twiddle",
+            "twig","twilight","twill","twin","twinge",
+            "twinkle","twinkling","twirl","twirp","twist",
+            "twister","twit","twitch","twitter","twixt",
+            "two","twofaced","twopence","twopenny","twosome",
+            "tycoon","tyke","tympanum","type","typecast",
+            "typeface","typescript","typesetter","typewriter","typewritten",
+            "typhoid","typhoon","typhus","typical","typically",
+            "typify","typist","typographer","typographic","typography",
+            "tyrannical","tyrannise","tyrannize","tyrannosaurus","tyranny",
+            "tyrant","tyre","tyro","tzar","tzarina",
+            "ubiquitous","ucca","udder","ufo","ugh",
+            "ugly","uhf","ukulele","ulcer","ulcerate",
+            "ulcerous","ullage","ulna","ult","ulterior",
+            "ultimate","ultimately","ultimatum","ultimo","ultramarine",
+            "ultrasonic","ultraviolet","umber","umbrage","umbrella",
+            "umlaut","umpire","umpteen","unabashed","unabated",
+            "unable","unabridged","unaccompanied","unaccountable","unaccustomed",
+            "unadopted","unadulterated","unadvised","unaffected","unalloyed",
+            "unanimous","unannounced","unanswerable","unapproachable","unarmed",
+            "unasked","unassuming","unattached","unattended","unavailing",
+            "unawares","unbalance","unbar","unbearable","unbearably",
+            "unbeknown","unbelief","unbelievable","unbeliever","unbelieving",
+            "unbend","unbending","unbidden","unbind","unblushing",
+            "unborn","unbosom","unbounded","unbowed","unbridled",
+            "unbuckle","unburden","unbuttoned","uncanny","unceremonious",
+            "uncertain","uncertainty","uncharitable","uncharted","unchecked",
+            "unchristian","unclad","uncle","unclean","unclouded",
+            "uncolored","uncoloured","uncomfortable","uncommitted","uncommonly",
+            "uncompromising","unconcerned","unconditional","unconscionable","unconscious",
+            "unconsidered","uncork","uncouple","uncouth","uncover",
+            "uncritical","uncrowned","uncrushable","unction","unctuous",
+            "uncut","undaunted","undeceive","undecided","undeclared",
+            "undeniable","under","underact","underarm","underbelly",
+            "underbrush","undercarriage","undercharge","underclothes","undercoat",
+            "undercover","undercurrent","undercut","underdog","underdone",
+            "underestimate","underfelt","underfloor","underfoot","undergarment",
+            "undergo","undergraduate","underground","undergrowth","underhand",
+            "underhanded","underhung","underlay","underlie","underline",
+            "underling","underlying","undermanned","undermentioned","undermine",
+            "underneath","undernourish","underpants","underpass","underpin",
+            "underplay","underprivileged","underproof","underquote","underrate",
+            "underscore","undersecretary","undersell","undersexed","undershirt",
+            "underside","undersigned","undersized","underslung","understaffed",
+            "understand","understanding","understate","understatement","understudy",
+            "undertake","undertaker","undertaking","undertone","undertow",
+            "underwater","underwear","underweight","underwent","underworld",
+            "underwrite","underwriter","undesirable","undeveloped","undies",
+            "undischarged","undistinguished","undivided","undo","undoing",
+            "undomesticated","undone","undoubted","undress","undressed",
+            "undue","undulate","undulation","unduly","undying",
+            "unearth","unearthly","unease","uneasy","uneconomic",
+            "uneducated","unemployed","unemployment","unenlightened","unenviable",
+            "unequal","unequaled","unequalled","unequivocal","unerring",
+            "unesco","uneven","uneventful","unexampled","unexceptionable",
+            "unfailing","unfaithful","unfaltering","unfathomable","unfathomed",
+            "unfavorable","unfavourable","unfeeling","unfettered","unfit",
+            "unflagging","unflappable","unflinching","unfold","unforeseen",
+            "unforgettable","unfortunate","unfortunately","unfounded","unfrequented",
+            "unfrock","unfurl","ungainly","ungenerous","ungodly",
+            "ungovernable","ungracious","ungrateful","ungrudging","unguarded",
+            "unguent","unhallowed","unhand","unhappily","unhappy",
+            "unhealthy","unheard","unhinge","unholy","unhook",
+            "unhorse","unicef","unicorn","unidentified","unification",
+            "uniform","uniformed","unify","unilateral","unimpeachable",
+            "uninformed","uninhabitable","uninhibited","uninterested","uninterrupted",
+            "union","unionise","unionism","unionist","unionize",
+            "unique","unisex","unison","unit","unitarian",
+            "unite","united","unity","universal","universally",
+            "universe","university","unkempt","unkind","unkindly",
+            "unknowing","unknown","unlawful","unlearn","unleash",
+            "unleavened","unless","unlettered","unlike","unlikely",
+            "unload","unlock","unloose","unloosen","unmade",
+            "unmannerly","unmarried","unmask","unmatched","unmeasured",
+            "unmentionable","unmentionables","unmindful","unmistakable","unmitigated",
+            "unmoved","unnatural","unnecessary","unnerve","unnumbered",
+            "uno","unobtrusive","unofficial","unorthodox","unpack",
+            "unparalleled","unparliamentary","unperson","unpick","unplaced",
+            "unplayable","unpleasant","unplumbed","unpracticed","unpractised",
+            "unprecedented","unprejudiced","unpretentious","unprincipled","unprintable",
+            "unprofessional","unprompted","unprovoked","unqualified","unquestionable",
+            "unquestioning","unquiet","unquote","unravel","unreadable",
+            "unreal","unreasonable","unreasoning","unrelenting","unrelieved",
+            "unremitting","unrequited","unreserved","unrest","unrestrained",
+            "unrip","unrivaled","unrivalled","unroll","unruffled",
+            "unruly","unsaddle","unsaid","unsavory","unsavoury",
+            "unsay","unscathed","unschooled","unscramble","unscrew",
+            "unscripted","unscrupulous","unseat","unseeing","unseemly",
+            "unseen","unserviceable","unsettle","unsettled","unsex",
+            "unsexed","unshakable","unshakeable","unshod","unsightly",
+            "unskilled","unsociable","unsocial","unsophisticated","unsound",
+            "unsparing","unspeakable","unspotted","unstop","unstrung",
+            "unstuck","unstudied","unsullied","unsung","unswerving",
+            "untangle","untapped","untenable","unthinkable","unthinking",
+            "untie","until","untimely","untinged","untiring",
+            "unto","untold","untouchable","untoward","untruth",
+            "untruthful","untutored","unused","unusual","unusually",
+            "unutterable","unvarnished","unveil","unversed","unvoiced",
+            "unwarranted","unwed","unwell","unwieldy","unwind",
+            "unwitting","unwonted","unzip","upbeat","upbraid",
+            "upbringing","upcoming","update","upend","upgrade",
+            "upheaval","uphill","uphold","upholster","upholsterer",
+            "upholstery","upkeep","upland","uplift","upon",
+            "upper","uppercut","uppermost","uppish","uppity",
+            "upright","uprising","uproar","uproarious","uproot",
+            "upset","upshot","upstage","upstairs","upstanding",
+            "upstart","upstream","upsurge","upswing","uptake",
+            "uptight","uptown","upturn","upturned","upward",
+            "upwards","uranium","uranus","urban","urbane",
+            "urbanise","urbanize","urchin","urge","urgent",
+            "uric","urinal","urinary","urinate","urine",
+            "urn","usage","use","useful","usefulness",
+            "useless","user","usher","usherette","ussr",
+            "usual","usually","usurer","usurious","usurp",
+            "usury","utensil","uterine","uterus","utilise",
+            "utilitarian","utilitarianism","utility","utilize","utmost",
+            "utopia","utopian","utter","utterance","utterly",
+            "uvula","uvular","uxorious","vac","vacancy",
+            "vacant","vacate","vacation","vaccinate","vaccination",
+            "vaccine","vacillate","vacuity","vacuous","vacuum",
+            "vagabond","vagary","vagina","vaginal","vagrancy",
+            "vagrant","vague","vain","vainglorious","vainglory",
+            "valance","vale","valediction","valedictory","valency",
+            "valentine","valerian","valet","valetudinarian","valiant",
+            "valiantly","valid","validate","valise","valley",
+            "valor","valour","valse","valuable","valuation",
+            "value","valuer","valve","valvular","vamoose",
+            "vamp","vampire","van","vanadium","vandal",
+            "vandalise","vandalism","vandalize","vane","vanguard",
+            "vanilla","vanish","vanity","vanquish","vantagepoint",
+            "vapid","vapidity","vapor","vaporise","vaporize",
+            "vaporous","vapors","vapour","vapours","variability",
+            "variable","variance","variant","variation","varicolored",
+            "varicoloured","varicose","varied","variegated","variegation",
+            "variety","variform","variorum","various","variously",
+            "varlet","varmint","varnish","varsity","vary",
+            "vascular","vase","vasectomy","vaseline","vassal",
+            "vassalage","vast","vastly","vastness","vat",
+            "vatican","vaudeville","vault","vaulted","vaulting",
+            "vaunt","veal","vector","veer","veg",
+            "vegan","vegetable","vegetarian","vegetarianism","vegetate",
+            "vegetation","vehement","vehicle","vehicular","veil",
+            "veiled","vein","veined","veining","velar",
+            "velarize","veld","veldt","vellum","velocipede",
+            "velocity","velour","velours","velvet","velveteen",
+            "velvety","venal","vend","vendee","vender",
+            "vendetta","vendor","veneer","venerable","venerate",
+            "venereal","vengeance","vengeful","venial","venison",
+            "venom","venomous","venous","vent","ventilate",
+            "ventilation","ventilator","ventricle","ventriloquism","ventriloquist",
+            "venture","venturer","venturesome","venue","veracious",
+            "veracity","veranda","verandah","verb","verbal",
+            "verbalise","verbalize","verbally","verbatim","verbena",
+            "verbiage","verbose","verbosity","verdant","verdict",
+            "verdigris","verdure","verge","verger","verify",
+            "verily","verisimilitude","veritable","verity","vermicelli",
+            "vermiculite","vermiform","vermifuge","vermilion","vermin",
+            "verminous","vermouth","vernacular","vernal","veronal",
+            "veronica","verruca","versatile","verse","versed",
+            "versification","versify","version","verso","versus",
+            "vertebra","vertebrate","vertex","vertical","vertiginous",
+            "vertigo","verve","very","vesicle","vesicular",
+            "vesper","vespers","vessel","vest","vestibule",
+            "vestige","vestigial","vestment","vestry","vestryman",
+            "vesture","vet","vetch","veteran","veterinary",
+            "veto","vex","vexation","vexatious","vhf",
+            "via","viable","viaduct","vial","viands",
+            "vibes","vibrancy","vibrant","vibraphone","vibrate",
+            "vibration","vibrato","vibrator","vicar","vicarage",
+            "vicarious","vice","vicelike","viceregal","vicereine",
+            "viceroy","vicinity","vicious","vicissitudes","victim",
+            "victimise","victimize","victor","victorian","victorious",
+            "victory","victual","victualer","victualler","victuals",
+            "vicuaa","vicuana","vide","videlicet","video",
+            "videotape","vie","view","viewer","viewfinder",
+            "viewless","viewpoint","vigil","vigilance","vigilant",
+            "vigilante","vignette","vigor","vigorous","vigour",
+            "viking","vile","vilification","vilify","villa",
+            "village","villager","villain","villainies","villainous",
+            "villainy","villein","villeinage","villenage","vim",
+            "vinaigrette","vindicate","vindication","vindictive","vine",
+            "vinegar","vinegary","vinery","vineyard","vino",
+            "vinous","vintage","vintner","vinyl","viol",
+            "viola","violate","violence","violent","violet",
+            "violin","violoncello","vip","viper","virago",
+            "virgin","virginal","virginals","virginia","virginity",
+            "virgo","virgule","virile","virility","virologist",
+            "virology","virtu","virtual","virtually","virtue",
+            "virtuosity","virtuoso","virtuous","virulence","virulent",
+            "virus","visa","visage","viscera","visceral",
+            "viscosity","viscount","viscountcy","viscountess","viscous",
+            "vise","visibility","visible","visibly","vision",
+            "visionary","visit","visitant","visitation","visiting",
+            "visitor","visor","vista","visual","visualise",
+            "visualize","visually","vital","vitalise","vitality",
+            "vitalize","vitally","vitals","vitamin","vitiate",
+            "viticulture","vitreous","vitrify","vitriol","vitriolic",
+            "vituperate","vituperation","vituperative","vivace","vivacious",
+            "vivarium","vivid","viviparous","vivisect","vivisection",
+            "vivisectionist","vixen","vixenish","vizier","vocab",
+            "vocabulary","vocal","vocalise","vocalist","vocalize",
+            "vocation","vocational","vocative","vociferate","vociferation",
+            "vociferous","vodka","vogue","voice","voiceless",
+            "void","voile","vol","volatile","volcanic",
+            "volcano","vole","volition","volitional","volley",
+            "volleyball","volt","voltage","voluble","volume",
+            "volumes","voluminous","voluntary","volunteer","voluptuary",
+            "voluptuous","volute","vomit","voodoo","voracious",
+            "vortex","votary","vote","voter","votive",
+            "vouch","voucher","vouchsafe","vow","vowel",
+            "voyage","voyager","voyages","voyeur","vtol",
+            "vulcanise","vulcanite","vulcanize","vulgar","vulgarian",
+            "vulgarise","vulgarism","vulgarity","vulgarize","vulgate",
+            "vulnerable","vulpine","vulture","vulva","wac",
+            "wack","wacky","wad","wadding","waddle",
+            "wade","wader","wadge","wadi","wady",
+            "wafer","waffle","waft","wag","wage",
+            "wager","wages","waggery","waggish","waggle",
+            "waggon","waggoner","waggonette","wagon","wagoner",
+            "wagonette","wagtail","waif","wail","wain",
+            "wainscot","waist","waistband","waistcoat","waistline",
+            "wait","waiter","waits","waive","waiver",
+            "wake","wakeful","waken","waking","walk",
+            "walkabout","walkaway","walker","walking","walkout",
+            "walkover","wall","walla","wallaby","wallah",
+            "wallet","wallflower","wallop","walloping","wallow",
+            "wallpaper","walnut","walrus","waltz","wampum",
+            "wan","wand","wander","wanderer","wandering",
+            "wanderings","wanderlust","wane","wangle","wank",
+            "wanker","want","wanting","wanton","wants",
+            "wapiti","war","warble","warbler","ward",
+            "warden","warder","wardrobe","wardroom","warehouse",
+            "wares","warfare","warhead","warhorse","warily",
+            "warlike","warlock","warlord","warm","warmonger",
+            "warmth","warn","warning","warp","warpath",
+            "warrant","warrantee","warrantor","warranty","warren",
+            "warrior","warship","wart","warthog","wartime",
+            "wary","was","wash","washable","washbasin",
+            "washboard","washbowl","washcloth","washday","washer",
+            "washerwoman","washhouse","washing","washout","washroom",
+            "washstand","washwoman","washy","wasp","waspish",
+            "wassail","wast","wastage","waste","wasteful",
+            "waster","wastrel","watch","watchband","watchdog",
+            "watches","watchful","watchmaker","watchman","watchtower",
+            "watchword","water","waterborne","watercolor","watercolour",
+            "watercourse","watercress","waterfall","waterfowl","waterfront",
+            "waterhole","waterline","waterlogged","waterloo","waterman",
+            "watermark","watermelon","watermill","waterpower","waterproof",
+            "waters","watershed","waterside","waterspout","watertight",
+            "waterway","waterwheel","waterwings","waterworks","watery",
+            "watt","wattage","wattle","wave","wavelength",
+            "waver","wavy","wax","waxen","waxworks",
+            "waxy","way","waybill","wayfarer","wayfaring",
+            "waylay","ways","wayside","wayward","weak",
+            "weaken","weakling","weakness","weal","weald",
+            "wealth","wealthy","wean","weapon","weaponry",
+            "wear","wearing","wearisome","weary","weasel",
+            "weather","weatherboard","weathercock","weatherglass","weatherman",
+            "weatherproof","weathers","weave","weaver","web",
+            "webbed","webbing","wed","wedded","wedding",
+            "wedge","wedged","wedgwood","wedlock","wednesday",
+            "wee","weed","weeds","weedy","week",
+            "weekday","weekend","weekender","weekly","weeknight",
+            "weeny","weep","weeping","weepy","weevil",
+            "weft","weigh","weighbridge","weight","weighted",
+            "weighting","weightless","weighty","weir","weird",
+            "weirdie","weirdo","welch","welcome","weld",
+            "welder","welfare","welkin","well","wellbeing",
+            "wellborn","wellington","wellspring","welsh","welt",
+            "weltanschauung","welter","welterweight","wen","wench",
+            "wend","wensleydale","went","wept","were",
+            "werewolf","wert","wesleyan","west","westbound",
+            "westerly","western","westerner","westernise","westernize",
+            "westernmost","westward","westwards","wet","wether",
+            "wetting","whack","whacked","whacker","whacking",
+            "whale","whalebone","whaler","whaling","wham",
+            "wharf","what","whatever","whatnot","wheat",
+            "wheaten","wheedle","wheel","wheelbarrow","wheelbase",
+            "wheelchair","wheelhouse","wheeling","wheels","wheelwright",
+            "wheeze","wheezy","whelk","whelp","when",
+            "whence","whenever","where","whereabouts","whereas",
+            "whereat","whereby","wherefore","wherefores","wherein",
+            "whereof","whereon","wheresoever","whereto","whereupon",
+            "wherever","wherewithal","wherry","whet","whether",
+            "whetstone","whew","whey","which","whichever",
+            "whiff","whiffy","whig","while","whim",
+            "whimper","whimsey","whimsical","whimsicality","whimsy",
+            "whin","whine","whiner","whinny","whip",
+            "whipcord","whiplash","whippersnapper","whippet","whipping",
+            "whippoorwill","whippy","whir","whirl","whirligig",
+            "whirlpool","whirlwind","whirlybird","whirr","whisk",
+            "whisker","whiskered","whiskers","whiskey","whisky",
+            "whisper","whist","whistle","whit","white",
+            "whitebait","whitehall","whiten","whitening","whites",
+            "whitethorn","whitethroat","whitewash","whither","whiting",
+            "whitlow","whitsun","whitsuntide","whittle","whiz",
+            "whizz","who","whoa","whodunit","whoever",
+            "whole","wholemeal","wholesale","wholesaler","wholesome",
+            "wholly","whom","whoop","whoopee","whoosh",
+            "whop","whopper","whopping","whore","whorehouse",
+            "whoremonger","whorl","whortleberry","whose","whosoever",
+            "why","whys","wick","wicked","wicker",
+            "wickerwork","wicket","wide","widely","widen",
+            "widespread","widgeon","widow","widowed","widower",
+            "widowhood","width","wield","wife","wifely",
+            "wig","wigged","wigging","wiggle","wight",
+            "wigwam","wilco","wild","wildcat","wildebeest",
+            "wilderness","wildfire","wildfowl","wildlife","wildly",
+            "wile","wiles","wilful","wiliness","will",
+            "willful","willies","willing","willow","willowy",
+            "willpower","wilt","wily","wimple","wimpy",
+            "win","wince","winceyette","winch","wind",
+            "windbag","windbreak","windcheater","windfall","windily",
+            "winding","windjammer","windlass","windless","windmill",
+            "window","windowpane","windowsill","windpipe","windscreen",
+            "windshield","windsock","windstorm","windswept","windward",
+            "windy","wine","winebibbing","wineglass","winepress",
+            "wineskin","wing","winger","wings","wingspan",
+            "wink","winkers","winkle","winner","winning",
+            "winnings","winnow","winsome","winter","wintergreen",
+            "wintertime","wintry","wipe","wiper","wire",
+            "wirecutters","wireless","wiretap","wireworm","wiring",
+            "wiry","wisdom","wise","wisecrack","wish",
+            "wishbone","wisp","wispy","wisteria","wistful",
+            "wit","witch","witchcraft","witchdoctor","witchery",
+            "witching","with","withal","withdraw","withdrawal",
+            "withdrawn","withe","wither","withering","withers",
+            "withhold","within","without","withstand","withy",
+            "witless","witness","witticism","witting","witty",
+            "wives","wizard","wizardry","wizened","woad",
+            "wobble","wobbly","woe","woebegone","woeful",
+            "wog","woke","woken","wold","wolf",
+            "wolfhound","wolfram","wolfsbane","woman","womanhood",
+            "womanise","womanish","womanize","womankind","womanly",
+            "womb","wombat","womenfolk","won","wonder",
+            "wonderful","wonderland","wonderment","wonders","wondrous",
+            "wonky","wont","wonted","woo","wood",
+            "woodbine","woodblock","woodcock","woodcraft","woodcut",
+            "woodcutter","wooded","wooden","woodenheaded","woodland",
+            "woodlouse","woodpecker","woodpile","woodshed","woodsman",
+            "woodwind","woodwork","woodworm","woody","wooer",
+            "woof","woofer","wool","woolen","woolens",
+            "woolgather","woolgathering","woollen","woollens","woolly",
+            "woolsack","woozy","wop","word","wording",
+            "wordless","wordplay","words","wordy","wore",
+            "work","workable","workaday","workbag","workbasket",
+            "workbench","workbook","workday","worker","workhorse",
+            "workhouse","working","workings","workman","workmanlike",
+            "workmanship","workout","workpeople","workroom","works",
+            "workshop","worktop","world","worldly","worldshaking",
+            "worldwide","worm","wormhole","wormwood","wormy",
+            "worn","worried","worrisome","worry","worse",
+            "worsen","worship","worshipful","worst","worsted",
+            "wort","worth","worthless","worthwhile","worthy",
+            "wot","wotcher","would","wouldst","wound",
+            "wove","woven","wow","wrac","wrack",
+            "wraith","wrangle","wrangler","wrap","wrapper",
+            "wrapping","wrath","wreak","wreath","wreathe",
+            "wreck","wreckage","wrecker","wren","wrench",
+            "wrest","wrestle","wretch","wretched","wriggle",
+            "wright","wring","wringer","wrinkle","wrist",
+            "wristband","wristlet","wristwatch","wristy","writ",
+            "write","writer","writhe","writing","writings",
+            "written","wrong","wrongdoing","wrongful","wrongheaded",
+            "wrote","wroth","wrought","wrung","wry",
+            "wurst","wyvern","xenon","xenophobia","xerox",
+            "xylophone","yacht","yachting","yachtsman","yahoo",
+            "yak","yam","yammer","yang","yank",
+            "yankee","yap","yard","yardage","yardarm",
+            "yardstick","yarn","yarrow","yashmak","yaw",
+            "yawl","yawn","yaws","yea","yeah",
+            "year","yearbook","yearling","yearlong","yearly",
+            "yearn","yearning","years","yeast","yeasty",
+            "yell","yellow","yelp","yen","yeoman",
+            "yeomanry","yes","yesterday","yet","yeti",
+            "yew","yid","yiddish","yield","yielding",
+            "yin","yippee","yobbo","yodel","yoga",
+            "yoghurt","yogi","yogurt","yoke","yokel",
+            "yolk","yonder","yonks","yore","yorker",
+            "you","young","younger","youngster","your",
+            "yours","yourself","youth","youthful","yowl",
+            "yoyo","yucca","yule","yuletide","zany",
+            "zeal","zealot","zealotry","zealous","zebra",
+            "zebu","zed","zeitgeist","zen","zenana",
+            "zenith","zephyr","zeppelin","zero","zest",
+            "ziggurat","zigzag","zinc","zinnia","zionism",
+            "zip","zipper","zippy","zither","zizz",
+            "zodiac","zombi","zombie","zonal","zone",
+            "zoning","zonked","zoo","zoologist","zoology",
+            "zoom","zoophyte","zouave","zucchini","zulu",
+        };
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
index 2e21b1e..f244773 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
@@ -68,7 +68,7 @@ namespace Lucene.Net.Analysis.En
             int len = termAttribute.Length;
             if ((!keywordAtt.Keyword) && stemmer.Stem(term, len))
             {
-                termAttribute.SetEmpty().Append(stemmer.ToString());
+                termAttribute.SetEmpty().Append(stemmer.AsCharSequence());
             }
 
             return true;


[50/50] [abbrv] lucenenet git commit: Fixed bug in Hunspell with the word order validation due to use of CompareTo, which is using different sorting rules than Java.

Posted by sy...@apache.org.
Fixed bug in Hunspell with the word order validation due to use of CompareTo, which is using different sorting rules than Java.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4deebe8f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4deebe8f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4deebe8f

Branch: refs/heads/analysis-work
Commit: 4deebe8fe40bec9a0164839aa6e4bd0e04701ae9
Parents: 1995da2
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 18:32:54 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 18:32:54 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Hunspell/Dictionary.cs             | 24 ++++++++------------
 1 file changed, 10 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4deebe8f/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index f1b2467..8bab079 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -852,19 +852,15 @@ namespace Lucene.Net.Analysis.Hunspell
                     Array.Sort(wordForm);
                     entry = line2.Substring(0, flagSep - 0);
                 }
-
-                int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
-                // LUCENENET TODO: For some reason the CompareTo method is working differently in .NET
-                // than it does in Java when it comes to strings. This check seems to fail on every dictionary.
-                // However, we must assume that most (if not all) dictionaries are sorted correctly, so 
-                // in order to make it function at all, this validation check is being removed. But 
-                // if the reason why it is failing can be determined, it probably should be put back in.
-                //if (cmp < 0)
-                //{
-                //    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
-                //}
-                //else
-                //{
+                // LUCENENET NOTE: CompareToOrdinal is an extension method that works similarly to
+                // Java's String.compareTo method.
+                int cmp = currentEntry == null ? 1 : entry.CompareToOrdinal(currentEntry);
+                if (cmp < 0)
+                {
+                    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
+                }
+                else
+                {
                     EncodeFlags(flagsScratch, wordForm);
                     int ord = flagLookup.Add(flagsScratch);
                     if (ord < 0)
@@ -886,7 +882,7 @@ namespace Lucene.Net.Analysis.Hunspell
                     }
                     currentOrds.Grow(currentOrds.Length + 1);
                     currentOrds.Ints[currentOrds.Length++] = ord;
-                //}
+                }
             }
 
             // finalize last entry


[36/50] [abbrv] lucenenet git commit: Increased timeout values of long running tests so they will have ample time to complete.

Posted by sy...@apache.org.
Increased timeout values of long running tests so they will have ample time to complete.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/bb59767c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/bb59767c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/bb59767c

Branch: refs/heads/analysis-work
Commit: bb59767c6622fca06e11b9a6c29d19901b2df236
Parents: 9124e03
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 12:39:57 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 12:39:57 2016 +0700

----------------------------------------------------------------------
 .../core/Codecs/Lucene41/TestBlockPostingsFormat3.cs             | 2 +-
 src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs    | 2 +-
 src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs               | 2 +-
 src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs           | 3 ++-
 src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs   | 4 ++--
 src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs                 | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Codecs/Lucene41/TestBlockPostingsFormat3.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Codecs/Lucene41/TestBlockPostingsFormat3.cs b/src/Lucene.Net.Tests/core/Codecs/Lucene41/TestBlockPostingsFormat3.cs
index 6552584..44e268c 100644
--- a/src/Lucene.Net.Tests/core/Codecs/Lucene41/TestBlockPostingsFormat3.cs
+++ b/src/Lucene.Net.Tests/core/Codecs/Lucene41/TestBlockPostingsFormat3.cs
@@ -72,7 +72,7 @@ namespace Lucene.Net.Codecs.Lucene41
         internal static readonly int MAXDOC = Lucene41PostingsFormat.BLOCK_SIZE * 20;
 
         // creates 8 fields with different options and does "duels" of fields against each other
-        [Test, LongRunningTest]
+        [Test, LongRunningTest, Timeout(50000)]
         public virtual void Test()
         {
             Directory dir = NewDirectory();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs b/src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs
index 9c89305..ba08f6c 100644
--- a/src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs
+++ b/src/Lucene.Net.Tests/core/Index/TestBinaryDocValuesUpdates.cs
@@ -1612,7 +1612,7 @@ namespace Lucene.Net.Index
             dir.Dispose();
         }
 
-        [Test, LongRunningTest]
+        [Test, LongRunningTest, Timeout(40000)]
         public virtual void TestTonsOfUpdates()
         {
             // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs b/src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs
index 1f7dd36..c1538c1 100644
--- a/src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs
+++ b/src/Lucene.Net.Tests/core/Index/TestTermdocPerf.cs
@@ -163,7 +163,7 @@ namespace Lucene.Net.Index
             return ret;
         }
 
-        [Test, LongRunningTest]
+        [Test, LongRunningTest, Timeout(120000)]
         public virtual void TestTermDocPerf()
         {
             // performance test for 10% of documents containing a term

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs b/src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs
index 4cd8d3c..a124089 100644
--- a/src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs
+++ b/src/Lucene.Net.Tests/core/Search/TestAutomatonQuery.cs
@@ -5,6 +5,7 @@ using System.Threading;
 
 namespace Lucene.Net.Search
 {
+    using Attributes;
     using NUnit.Framework;
     using Automaton = Lucene.Net.Util.Automaton.Automaton;
     using AutomatonTestUtil = Lucene.Net.Util.Automaton.AutomatonTestUtil;
@@ -222,7 +223,7 @@ namespace Lucene.Net.Search
             Assert.AreEqual(0, AutomatonQueryNrHits(aq));
         }
 
-        [Test]
+        [Test, LongRunningTest, Timeout(40000)]
         public virtual void TestHashCodeWithThreads()
         {
             AutomatonQuery[] queries = new AutomatonQuery[1000];

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs b/src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs
index 36201af..9020f3b 100644
--- a/src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs
+++ b/src/Lucene.Net.Tests/core/Util/Packed/TestEliasFanoSequence.cs
@@ -304,7 +304,7 @@ namespace Lucene.Net.Util.Packed
             }
         }
 
-        [Test, LongRunningTest]
+        [Test, LongRunningTest, Timeout(50000)]
         public virtual void TestMonotoneSequencesLonger()
         {
             for (int s = 2; s < 4422; s++)
@@ -333,7 +333,7 @@ namespace Lucene.Net.Util.Packed
             }
         }
 
-        [Test, LongRunningTest]
+        [Test, LongRunningTest, Timeout(50000)]
         public virtual void TestStrictMonotoneSequencesLonger()
         {
             for (int s = 2; s < 4422; s++)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/bb59767c/src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs b/src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs
index 7e63c67..42e305a 100644
--- a/src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs
+++ b/src/Lucene.Net.Tests/core/Util/TestPagedBytes.cs
@@ -179,7 +179,7 @@ namespace Lucene.Net.Util
         }
 
         [Test]
-        [LongRunningTest]
+        [LongRunningTest, Timeout(120000)]
         public virtual void TestOverflow() // memory hole
         {
             BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow"));


[33/50] [abbrv] lucenenet git commit: Changed Join.TermsIncluingScoreQuery.SVInOrderScorer.outerInstance variable to protected so its subclass can access it (fixes Join.TestJoinUtil.TestMultiValueRandomJoin())

Posted by sy...@apache.org.
Changed Join.TermsIncluingScoreQuery.SVInOrderScorer.outerInstance variable to protected so its subclass can access it (fixes Join.TestJoinUtil.TestMultiValueRandomJoin())


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/9313ff9c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/9313ff9c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/9313ff9c

Branch: refs/heads/analysis-work
Commit: 9313ff9c89d330b718773808ba7d01faf7d02f02
Parents: 0fd8d34
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 10:58:19 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 10:58:19 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Join/TermsIncludingScoreQuery.cs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9313ff9c/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
index 9f3befc..93f2d2e 100644
--- a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
+++ b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -349,7 +349,7 @@ namespace Lucene.Net.Join
 
         internal class SVInOrderScorer : Scorer
         {
-            private readonly TermsIncludingScoreQuery outerInstance;
+            protected readonly TermsIncludingScoreQuery outerInstance;
 
 
             internal readonly DocIdSetIterator matchingDocsIterator;
@@ -427,14 +427,10 @@ namespace Lucene.Net.Join
         // This scorer deals with the fact that a document can have more than one score from multiple related documents.
         internal class MVInOrderScorer : SVInOrderScorer
         {
-            private readonly TermsIncludingScoreQuery outerInstance;
-
-            
             internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, Bits acceptDocs,
                 TermsEnum termsEnum, int maxDoc, long cost)
                 : base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
             {
-                this.outerInstance = outerInstance;
             }
             
             protected override void FillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs,


[47/50] [abbrv] lucenenet git commit: Added test to demonstrate a problem with making the CharTokenizer.IsTokenChar() parameter a char rather than an int.

Posted by sy...@apache.org.
Added test to demonstrate a problem with making the CharTokenizer.IsTokenChar() parameter a char rather than an int.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/c36a0bd1
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/c36a0bd1
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/c36a0bd1

Branch: refs/heads/analysis-work
Commit: c36a0bd1239061a07756b7735dcdd7f3dab016a8
Parents: 56cdc04
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 15:39:52 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 15:55:19 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Util/TestCharTokenizers.cs         | 46 +++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c36a0bd1/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
index 0d28101..d452d83 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestCharTokenizers.cs
@@ -240,6 +240,50 @@ namespace Lucene.Net.Tests.Analysis.Common.Analysis.Util
                 }
             }
         }
-    }
 
+        /// <summary>
+        /// LUCENENET: Added this test as proof that making the IsTokenChar parameter a char
+        /// is not going to work 100% of the time because of surrogate pairs.
+        /// </summary>
+
+        [Test]
+        public virtual void TestSurrogates()
+        {
+            var analyzer = new AnalyzerAnonymousInnerClassHelper3();
+
+            AssertAnalyzesTo(analyzer, "bar 123" + (char)55404 + (char)56321 + "34 5te 987", new string[] { "123\U0002b00134", "5", "987" });
+            AssertAnalyzesTo(analyzer, "787 " + (char)55297 + (char)56388 + "6" + (char)55404 + (char)56321 + " art true 734", new string[] { "787", "\U000104446\U0002b001", "734" });
+        }
+
+        private sealed class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+        {
+            public AnalyzerAnonymousInnerClassHelper3()
+            { }
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new NumberAndSurrogatePairTokenizer(TEST_VERSION_CURRENT, reader);
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            }
+
+            private sealed class NumberAndSurrogatePairTokenizer : CharTokenizer
+            {
+                public NumberAndSurrogatePairTokenizer(LuceneVersion matchVersion, TextReader reader)
+                    : base(matchVersion, reader)
+                {
+                }
+
+                protected override bool IsTokenChar(char c)
+                {
+                    if (char.IsNumber((char)c))
+                    {
+                        return true;
+                    }
+
+                    string character = char.ConvertFromUtf32(c);
+                    return char.IsSurrogatePair(character, 0);
+                }
+            }
+        }
+    }
 }
\ No newline at end of file


[29/50] [abbrv] lucenenet git commit: Fixed "key not found" bug in Join.TestJoinUtil

Posted by sy...@apache.org.
Fixed "key not found" bug in Join.TestJoinUtil


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0f9f0ce9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0f9f0ce9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0f9f0ce9

Branch: refs/heads/analysis-work
Commit: 0f9f0ce995c92b109a1b73558a7043e1d07a1d5c
Parents: 11cf1b8
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 10:05:25 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 10:05:25 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0f9f0ce9/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
index a254997..1d2da10 100644
--- a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
+++ b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -1073,7 +1073,7 @@ namespace Lucene.Net.Tests.Join
             {
                 foreach (string linkValue in matchingDoc.LinkValues)
                 {
-                    IList<RandomDoc> otherMatchingDocs = linkValueDocuments[linkValue];
+                    IList<RandomDoc> otherMatchingDocs = linkValueDocuments.ContainsKey(linkValue) ? linkValueDocuments[linkValue] : null;
                     if (otherMatchingDocs == null)
                     {
                         continue;


[27/50] [abbrv] lucenenet git commit: Fixed bugs in TestOpenIfChangedMergedSegment() and TestOpenIfChangedNoChangesButSegmentMerges() in Facet.Taxonomy.Directory.TestDirectoryTaxonomyReader that caused them to throw null reference exceptions and fail.

Posted by sy...@apache.org.
Fixed bugs in TestOpenIfChangedMergedSegment() and  TestOpenIfChangedNoChangesButSegmentMerges() in Facet.Taxonomy.Directory.TestDirectoryTaxonomyReader that caused them to throw null reference exceptions and fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1727c1b3
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1727c1b3
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1727c1b3

Branch: refs/heads/analysis-work
Commit: 1727c1b3f84562d3718a7de0b86b51712507d26a
Parents: 694676c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 09:47:53 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 09:47:53 2016 +0700

----------------------------------------------------------------------
 .../Directory/TestDirectoryTaxonomyReader.cs    | 32 +++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1727c1b3/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyReader.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyReader.cs
index 15f30df..74ba8fe 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyReader.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyReader.cs
@@ -297,7 +297,12 @@ namespace Lucene.Net.Facet.Taxonomy.Directory
             // hold onto IW to forceMerge
             // note how we don't close it, since DTW will close it.
             IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()));
-            var writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper2(this, dir, iw) as DirectoryTaxonomyWriter;
+
+            // LUCENENET: We need to set the index writer before the constructor of the base class is called
+            // because the DirectoryTaxonomyWriter class constructor is the consumer of the OpenIndexWriter method.
+            // The only option seems to be to set it statically before creating the instance.
+            DirectoryTaxonomyWriterAnonymousInnerClassHelper2.iw = iw;
+            var writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper2(dir);
 
             var reader = new DirectoryTaxonomyReader(writer);
             Assert.AreEqual(1, reader.Size);
@@ -323,15 +328,11 @@ namespace Lucene.Net.Facet.Taxonomy.Directory
 
         private class DirectoryTaxonomyWriterAnonymousInnerClassHelper2 : DirectoryTaxonomyWriter
         {
-            private readonly TestDirectoryTaxonomyReader outerInstance;
-
-            private IndexWriter iw;
-            private IndexWriterConfig config;
+            internal static IndexWriter iw = null;
 
-            public DirectoryTaxonomyWriterAnonymousInnerClassHelper2(TestDirectoryTaxonomyReader outerInstance, Directory dir, IndexWriter iw) : base(dir)
+            public DirectoryTaxonomyWriterAnonymousInnerClassHelper2(Directory dir) 
+                : base(dir)
             {
-                this.outerInstance = outerInstance;
-                this.iw = iw;
             }
 
             protected override IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config) 
@@ -352,7 +353,12 @@ namespace Lucene.Net.Facet.Taxonomy.Directory
             // hold onto IW to forceMerge
             // note how we don't close it, since DTW will close it.
             var iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogByteSizeMergePolicy()));
-            DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper3(this, dir, iw);
+
+            // LUCENENET: We need to set the index writer before the constructor of the base class is called
+            // because the DirectoryTaxonomyWriter class constructor is the consumer of the OpenIndexWriter method.
+            // The only option seems to be to set it statically before creating the instance.
+            DirectoryTaxonomyWriterAnonymousInnerClassHelper3.iw = iw;
+            DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriterAnonymousInnerClassHelper3(dir);
 
 
             // add a category so that the following DTR open will cause a flush and 
@@ -381,15 +387,11 @@ namespace Lucene.Net.Facet.Taxonomy.Directory
 
         private class DirectoryTaxonomyWriterAnonymousInnerClassHelper3 : DirectoryTaxonomyWriter
         {
-            private readonly TestDirectoryTaxonomyReader outerInstance;
+            internal static IndexWriter iw;
 
-            private IndexWriter iw;
-
-            public DirectoryTaxonomyWriterAnonymousInnerClassHelper3(TestDirectoryTaxonomyReader outerInstance, Directory dir, IndexWriter iw)
+            public DirectoryTaxonomyWriterAnonymousInnerClassHelper3(Directory dir)
                 : base(dir)
             {
-                this.outerInstance = outerInstance;
-                this.iw = iw;
             }
 
             protected override IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)


[14/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
index a526c5f..73cb175 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData3.cs
@@ -47,6 +47,707 @@ namespace Lucene.Net.Analysis.En
         private KStemData3()
         {
         }
-        internal static string[] data = new string[] { "distasteful", "distemper", "distempered", "distend", "distension", "distil", "distill", "distillation", "distiller", "distillery", "distinct", "distinction", "distinctive", "distinguish", "distinguishable", "distinguished", "distort", "distortion", "distract", "distracted", "distraction", "distrain", "distraint", "distrait", "distraught", "distress", "distressing", "distribute", "distribution", "distributive", "distributor", "district", "distrust", "distrustful", "disturb", "disturbance", "disturbed", "disunion", "disunite", "disunity", "disuse", "disused", "disyllabic", "disyllable", "ditch", "dither", "dithers", "ditto", "ditty", "diuretic", "diurnal", "divagate", "divan", "dive", "diver", "diverge", "divergence", "divers", "diverse", "diversify", "diversion", "diversionary", "diversity", "divert", "divertimento", "divertissement", "divest", "divide", "dividend", "dividers", "divination", "divine", "diviner", "divingboard", "
 divinity", "divisible", "division", "divisive", "divisor", "divorce", "divot", "divulge", "divvy", "dixie", "dixieland", "dizzy", "djinn", "dna", "do", "dobbin", "doc", "docile", "dock", "docker", "docket", "dockyard", "doctor", "doctoral", "doctorate", "doctrinaire", "doctrinal", "doctrine", "document", "documentary", "documentation", "dodder", "doddering", "doddle", "dodge", "dodgems", "dodger", "dodgy", "dodo", "doe", "doer", "doeskin", "doff", "dog", "dogcart", "dogcatcher", "dogfight", "dogfish", "dogged", "doggerel", "doggie", "doggo", "doggone", "doggy", "doghouse", "dogie", "dogleg", "dogma", "dogmatic", "dogmatics", "dogmatism", "dogs", "dogsbody", "dogtooth", "dogtrot", "dogwood", "doh", "doily", "doings", "doldrums", "dole", "doleful", "doll", "dollar", "dollop", "dolly", "dolmen", "dolor", "dolorous", "dolour", "dolphin", "dolt", "domain", "dome", "domed", "domestic", "domesticate", "domesticity", "domicile", "domiciliary", "dominance", "dominant", "dominate", "dominatio
 n", "domineer", "dominican", "dominion", "domino", "dominoes", "don", "donate", "donation", "donjon", "donkey", "donkeywork", "donnish", "donor", "doodle", "doodlebug", "doom", "doomsday", "door", "doorbell", "doorframe", "doorkeeper", "doorknob", "doorknocker", "doorman", "doormat", "doornail", "doorplate", "doorscraper", "doorstep", "doorstopper", "doorway", "dope", "dopey", "dopy", "doric", "dormant", "dormer", "dormitory", "dormouse", "dorsal", "dory", "dosage", "dose", "doss", "dosser", "dosshouse", "dossier", "dost", "dot", "dotage", "dote", "doth", "doting", "dottle", "dotty", "double", "doubles", "doublet", "doublethink", "doubloon", "doubly", "doubt", "doubtful", "doubtless", "douche", "dough", "doughnut", "doughty", "doughy", "dour", "douse", "dove", "dovecote", "dovetail", "dowager", "dowdy", "dowel", "dower", "down", "downbeat", "downcast", "downdraft", "downdraught", "downer", "downfall", "downgrade", "downhearted", "downhill", "downpour", "downright", "downstage", "dow
 nstairs", "downstream", "downtown", "downtrodden", "downward", "downwards", "downwind", "downy", "dowry", "dowse", "doxology", "doyen", "doyley", "doze", "dozen", "dozy", "dpt", "drab", "drabs", "drachm", "drachma", "draconian", "draft", "draftee", "draftsman", "drafty", "drag", "draggled", "draggy", "dragnet", "dragoman", "dragon", "dragonfly", "dragoon", "drain", "drainage", "drainpipe", "drake", "dram", "drama", "dramatic", "dramatics", "dramatise", "dramatist", "dramatize", "drank", "drape", "draper", "drapery", "drastic", "drat", "draught", "draughtboard", "draughts", "draughtsman", "draughty", "draw", "drawback", "drawbridge", "drawer", "drawers", "drawing", "drawl", "drawn", "drawstring", "dray", "dread", "dreadful", "dreadfully", "dreadnaught", "dreadnought", "dream", "dreamboat", "dreamer", "dreamland", "dreamless", "dreamlike", "dreamy", "drear", "dreary", "dredge", "dredger", "dregs", "drench", "dress", "dressage", "dresser", "dressing", "dressmaker", "dressy", "drew", "d
 ribble", "driblet", "dribs", "drier", "drift", "driftage", "drifter", "driftnet", "driftwood", "drill", "drily", "drink", "drinkable", "drinker", "drip", "dripping", "drive", "drivel", "driver", "driveway", "driving", "drizzle", "drogue", "droll", "drollery", "dromedary", "drone", "drool", "droop", "drop", "dropkick", "droplet", "dropout", "dropper", "droppings", "drops", "dropsy", "dross", "drought", "drove", "drover", "drown", "drowse", "drowsy", "drub", "drudge", "drudgery", "drug", "drugget", "druggist", "drugstore", "druid", "drum", "drumbeat", "drumfire", "drumhead", "drummer", "drumstick", "drunk", "drunkard", "drunken", "drupe", "dry", "dryad", "dryer", "dual", "dub", "dubbin", "dubiety", "dubious", "ducal", "ducat", "duchess", "duchy", "duck", "duckboards", "duckling", "ducks", "duckweed", "ducky", "duct", "ductile", "dud", "dude", "dudgeon", "duds", "due", "duel", "duenna", "dues", "duet", "duff", "duffel", "duffer", "duffle", "dug", "dugout", "duke", "dukedom", "dukes", "
 dulcet", "dulcimer", "dull", "dullard", "duly", "dumb", "dumbbell", "dumbfound", "dumbwaiter", "dumfound", "dummy", "dump", "dumper", "dumpling", "dumps", "dumpy", "dun", "dunce", "dunderhead", "dung", "dungaree", "dungarees", "dungeon", "dunghill", "dunk", "duo", "duodecimal", "duodenum", "duologue", "dupe", "duplex", "duplicate", "duplicator", "duplicity", "durable", "duration", "durbar", "duress", "durex", "during", "durst", "dusk", "dusky", "dust", "dustbin", "dustbowl", "dustcart", "dustcoat", "duster", "dustman", "dustpan", "dustsheet", "dustup", "dusty", "dutch", "dutiable", "dutiful", "duty", "duvet", "dwarf", "dwell", "dwelling", "dwindle", "dyarchy", "dye", "dyestuff", "dyeworks", "dyke", "dynamic", "dynamics", "dynamism", "dynamite", "dynamo", "dynasty", "dysentery", "dyslexia", "dyspepsia", "dyspeptic", "each", "eager", "eagle", "eaglet", "ear", "earache", "eardrum", "eared", "earful", "earl", "earliest", "earlobe", "early", "earmark", "earmuff", "earn", "earnest", "earn
 ings", "earphone", "earpiece", "earplug", "earring", "earshot", "earth", "earthbound", "earthen", "earthenware", "earthling", "earthly", "earthnut", "earthquake", "earthshaking", "earthwork", "earthworm", "earthy", "earwax", "earwig", "ease", "easel", "easily", "east", "eastbound", "easter", "easterly", "eastern", "easterner", "easternmost", "easy", "easygoing", "eat", "eatable", "eatables", "eater", "eats", "eaves", "eavesdrop", "ebb", "ebony", "ebullience", "ebullient", "eccentric", "eccentricity", "ecclesiastic", "ecclesiastical", "ecg", "echelon", "echo", "eclectic", "eclipse", "ecliptic", "eclogue", "ecological", "ecologically", "ecology", "economic", "economical", "economically", "economics", "economise", "economist", "economize", "economy", "ecosystem", "ecstasy", "ecstatic", "ect", "ectoplasm", "ecumenical", "ecumenicalism", "eczema", "edam", "eddy", "edelweiss", "eden", "edge", "edgeways", "edging", "edgy", "edible", "edibles", "edict", "edification", "edifice", "edify", "e
 dit", "edition", "editor", "editorial", "editorialise", "editorialize", "educate", "educated", "education", "educational", "educationist", "educator", "educe", "eec", "eeg", "eel", "eerie", "efface", "effect", "effective", "effectively", "effectiveness", "effectives", "effects", "effectual", "effectually", "effectuate", "effeminacy", "effeminate", "effendi", "effervesce", "effete", "efficacious", "efficacy", "efficiency", "efficient", "effigy", "efflorescence", "effluent", "efflux", "effort", "effortless", "effrontery", "effulgence", "effulgent", "effusion", "effusive", "eft", "egalitarian", "egg", "eggcup", "egghead", "eggnog", "eggplant", "eggshell", "egis", "eglantine", "ego", "egocentric", "egoism", "egoist", "egotism", "egotist", "egregious", "egress", "egret", "eiderdown", "eight", "eighteen", "eightsome", "eighty", "eisteddfod", "either", "ejaculate", "ejaculation", "eject", "ejector", "eke", "ekg", "elaborate", "elaboration", "eland", "elapse", "elastic", "elasticity", "elas
 toplast", "elate", "elated", "elation", "elbow", "elbowroom", "elder", "elderberry", "elderflower", "elderly", "eldest", "elect", "election", "electioneer", "electioneering", "elective", "elector", "electoral", "electorate", "electric", "electrical", "electrician", "electricity", "electrify", "electrocardiogram", "electrocardiograph", "electrocute", "electrode", "electroencephalogram", "electroencephalograph", "electrolysis", "electrolyte", "electron", "electronic", "electronics", "electroplate", "eleemosynary", "elegant", "elegiac", "elegy", "element", "elemental", "elementary", "elements", "elephant", "elephantiasis", "elephantine", "elevate", "elevated", "elevation", "elevator", "eleven", "elevenses", "elf", "elfin", "elfish", "elicit", "elide", "eligible", "eliminate", "elite", "elitism", "elixir", "elizabethan", "elk", "elkhound", "ellipse", "ellipsis", "elliptic", "elm", "elocution", "elocutionary", "elocutionist", "elongate", "elongation", "elope", "eloquence", "eloquent", "e
 lse", "elsewhere", "elucidate", "elucidatory", "elude", "elusive", "elver", "elves", "elvish", "elysian", "elysium", "emaciate", "emanate", "emancipate", "emancipation", "emasculate", "embalm", "embankment", "embargo", "embark", "embarkation", "embarrass", "embarrassment", "embassy", "embattled", "embed", "embellish", "ember", "embezzle", "embitter", "emblazon", "emblem", "emblematic", "embodiment", "embody", "embolden", "embolism", "embonpoint", "embosomed", "emboss", "embowered", "embrace", "embrasure", "embrocation", "embroider", "embroidery", "embroil", "embryo", "embryonic", "emend", "emendation", "emerald", "emerge", "emergence", "emergency", "emergent", "emeritus", "emery", "emetic", "emigrant", "emigrate", "eminence", "eminent", "eminently", "emir", "emirate", "emissary", "emission", "emit", "emmentaler", "emmenthaler", "emollient", "emolument", "emote", "emotion", "emotional", "emotionalism", "emotionally", "emotive", "empanel", "empathy", "emperor", "emphasis", "emphasise"
 , "emphasize", "emphatic", "emphatically", "emphysema", "empire", "empirical", "empiricism", "emplacement", "emplane", "employ", "employable", "employee", "employer", "employment", "emporium", "empower", "empress", "emptily", "empty", "empurpled", "empyreal", "empyrean", "emu", "emulate", "emulation", "emulsify", "emulsion", "enable", "enabling", "enact", "enactment", "enamel", "enamelware", "enamored", "enamoured", "encamp", "encampment", "encapsulate", "encase", "encaustic", "encephalitis", "enchain", "enchant", "enchanter", "enchanting", "enchantment", "encipher", "encircle", "enclave", "enclose", "enclosure", "encode", "encomium", "encompass", "encore", "encounter", "encourage", "encouragement", "encroach", "encroachment", "encrust", "encumber", "encumbrance", "encyclical", "encyclopaedia", "encyclopaedic", "encyclopedia", "encyclopedic", "end", "endanger", "endear", "endearing", "endearment", "endeavor", "endeavour", "endemic", "ending", "endive", "endless", "endocrine", "endor
 se", "endow", "endowment", "endpaper", "endurance", "endure", "enduring", "endways", "enema", "enemy", "energetic", "energize", "energy", "enervate", "enfeeble", "enfilade", "enfold", "enforce", "enfranchise", "engage", "engaged", "engagement", "engaging", "engender", "engine", "engineer", "engineering", "english", "englishman", "engraft", "engrave", "engraving", "engross", "engrossing", "engulf", "enhance", "enigma", "enigmatic", "enjoin", "enjoy", "enjoyable", "enjoyment", "enkindle", "enlarge", "enlargement", "enlighten", "enlightened", "enlightenment", "enlist", "enliven", "enmesh", "enmity", "ennoble", "ennui", "enormity", "enormous", "enormously", "enough", "enplane", "enquire", "enquiring", "enquiry", "enrage", "enrapture", "enrich", "enrol", "enroll", "enrollment", "enrolment", "ensanguined", "ensconce", "ensemble", "enshrine", "enshroud", "ensign", "enslave", "ensnare", "ensue", "ensure", "entail", "entangle", "entanglement", "entente", "enter", "enteritis", "enterprise", "
 enterprising", "entertain", "entertainer", "entertaining", "entertainment", "enthral", "enthrall", "enthrone", "enthroned", "enthuse", "enthusiasm", "enthusiast", "entice", "enticement", "entire", "entirety", "entitle", "entity", "entomb", "entomology", "entourage", "entrails", "entrain", "entrance", "entrant", "entrap", "entreat", "entreaty", "entrench", "entrenched", "entrenchment", "entrepreneur", "entresol", "entropy", "entrust", "entry", "entwine", "enumerate", "enunciate", "enunciation", "envelop", "envenom", "enviable", "envious", "environed", "environment", "environmental", "environmentalist", "environs", "envisage", "envoi", "envoy", "envy", "enzyme", "eon", "epaulet", "epaulette", "ephemeral", "epic", "epicenter", "epicentre", "epicure", "epicurean", "epidemic", "epidermis", "epidiascope", "epiglottis", "epigram", "epigrammatic", "epilepsy", "epileptic", "epilogue", "epiphany", "episcopacy", "episcopal", "episcopalian", "episode", "episodic", "epistle", "epistolary", "epit
 aph", "epithet", "epitome", "epitomise", "epitomize", "epoch", "eponymous", "equability", "equable", "equal", "equalise", "equalitarian", "equality", "equalize", "equally", "equanimity", "equate", "equation", "equator", "equatorial", "equerry", "equestrian", "equidistant", "equilateral", "equilibrium", "equine", "equinoctial", "equinox", "equip", "equipage", "equipment", "equipoise", "equitable", "equitation", "equities", "equity", "equivalence", "equivalent", "equivocal", "equivocate", "equivocation", "era", "eradicate", "eradicator", "erase", "eraser", "erasure", "ere", "erect", "erectile", "erection", "eremite", "erg", "ergo", "ergonomics", "ermine", "erode", "erogenous", "erosion", "erotic", "erotica", "eroticism", "err", "errand", "errant", "erratic", "erratum", "erroneous", "error", "ersatz", "erse", "eructation", "erudite", "erupt", "eruption", "erysipelas", "escalate", "escalator", "escalope", "escapade", "escape", "escapee", "escapement", "escapism", "escapology", "escarpme
 nt", "eschatology", "eschew", "escort", "escritoire", "escutcheon", "eskimo", "esophagus", "esoteric", "esp", "espalier", "especial", "especially", "esperanto", "espionage", "esplanade", "espousal", "espouse", "espresso", "espy", "essay", "essence", "essential", "essentially", "establish", "establishment", "estaminet", "estate", "esteem", "esthete", "esthetic", "esthetics", "estimable", "estimate", "estimation", "estimator", "estrange", "estrangement", "estrogen", "estuary", "etch", "etching", "eternal", "eternity", "ether", "ethereal", "ethic", "ethical", "ethically", "ethics", "ethnic", "ethnically", "ethnographer", "ethnography", "ethnologist", "ethnology", "ethos", "ethyl", "etiolate", "etiology", "etiquette", "etymologist", "etymology", "eucalyptus", "eucharist", "euclidean", "euclidian", "eugenic", "eugenics", "eulogise", "eulogist", "eulogistic", "eulogize", "eulogy", "eunuch", "euphemism", "euphemistic", "euphonious", "euphonium", "euphony", "euphoria", "euphuism", "eurasian
 ", "eureka", "eurhythmic", "eurhythmics", "eurocrat", "eurodollar", "eurythmic", "eurythmics", "euthanasia", "evacuate", "evacuee", "evade", "evaluate", "evanescent", "evangelic", "evangelical", "evangelise", "evangelist", "evangelize", "evaporate", "evasion", "evasive", "eve", "even", "evening", "evenings", "evens", "evensong", "event", "eventful", "eventide", "eventual", "eventuality", "eventually", "eventuate", "ever", "evergreen", "everlasting", "everlastingly", "evermore", "every", "everybody", "everyday", "everything", "everywhere", "evict", "evidence", "evident", "evidently", "evil", "evildoer", "evince", "eviscerate", "evocative", "evoke", "evolution", "evolutionary", "evolve", "ewe", "ewer", "exacerbate", "exact", "exacting", "exaction", "exactly", "exaggerate", "exaggeration", "exalt", "exaltation", "exalted", "exam", "examination", "examine", "example", "exasperate", "exasperation", "excavate", "excavation", "excavator", "exceed", "exceedingly", "excel", "excellence", "ex
 cellency", "excellent", "excelsior", "except", "excepted", "excepting", "exception", "exceptionable", "exceptional", "excerpt", "excess", "excesses", "excessive", "exchange", "exchequer", "excise", "excision", "excitable", "excite", "excited", "excitement", "exciting", "exclaim", "exclamation", "exclamatory", "exclude", "excluding", "exclusion", "exclusive", "exclusively", "excogitate", "excommunicate", "excommunication", "excoriate", "excrement", "excrescence", "excreta", "excrete", "excretion", "excruciating", "exculpate", "excursion", "excursionist", "excusable", "excuse", "execrable", "execrate", "executant", "execute", "execution", "executioner", "executive", "executor", "exegesis", "exemplary", "exemplification", "exemplify", "exempt", "exemption", "exercise", "exercises", "exert", "exertion", "exeunt", "exhalation", "exhale", "exhaust", "exhaustion", "exhaustive", "exhibit", "exhibition", "exhibitionism", "exhibitor", "exhilarate", "exhilarating", "exhort", "exhortation", "ex
 hume", "exigency", "exigent", "exiguous", "exile", "exist", "existence", "existent", "existential", "existentialism", "existing", "exit", "exodus", "exogamy", "exonerate", "exorbitant", "exorcise", "exorcism", "exorcist", "exorcize", "exotic", "expand", "expanse", "expansion", "expansive", "expatiate", "expatriate", "expect", "expectancy", "expectant", "expectation", "expectations", "expectorate", "expediency", "expedient", "expedite", "expedition", "expeditionary", "expeditious", "expel", "expend", "expendable", "expenditure", "expense", "expenses", "expensive", "experience", "experienced", "experiment", "experimental", "experimentation", "expert", "expertise", "expiate", "expiration", "expire", "explain", "explanation", "explanatory", "expletive", "explicable", "explicate", "explicit", "explode", "exploded", "exploit", "exploration", "exploratory", "explore", "explosion", "explosive", "expo", "exponent", "exponential", "export", "exportation", "exporter", "expose", "exposition", "
 expostulate", "exposure", "expound", "express", "expression", "expressionism", "expressionless", "expressive", "expressly", "expressway", "expropriate", "expulsion", "expunge", "expurgate", "exquisite", "extant", "extemporaneous", "extempore", "extemporise", "extemporize", "extend", "extension", "extensive", "extent", "extenuate", "extenuation", "exterior", "exteriorise", "exteriorize", "exterminate", "external", "externalise", "externalize", "externally", "externals", "exterritorial", "extinct", "extinction", "extinguish", "extinguisher", "extirpate", "extol", "extort", "extortion", "extortionate", "extortions", "extra", "extract", "extraction", "extracurricular", "extraditable", "extradite", "extrajudicial", "extramarital", "extramural", "extraneous", "extraordinarily", "extraordinary", "extrapolate", "extraterrestrial", "extraterritorial", "extravagance", "extravagant", "extravaganza", "extravert", "extreme", "extremely", "extremism", "extremities", "extremity", "extricate", "ext
 rinsic", "extrovert", "extrude", "exuberance", "exuberant", "exude", "exult", "exultant", "exultation", "eye", "eyeball", "eyebrow", "eyecup", "eyeful", "eyeglass", "eyeglasses", "eyelash", "eyelet", "eyelid", "eyeliner", "eyepiece", "eyes", "eyeshot", "eyesight", "eyesore", "eyestrain", "eyetooth", "eyewash", "eyewitness", "eyot", "eyrie", "eyry", "fabian", "fable", "fabled", "fabric", "fabricate", "fabrication", "fabulous", "fabulously", "face", "facecloth", "faceless", "facet", "facetious", "facial", "facile", "facilitate", "facilities", "facility", "facing", "facings", "facsimile", "fact", "faction", "factious", "factitious", "factor", "factorial", "factorise", "factorize", "factory", "factotum", "factual", "faculty", "fad", "fade", "faeces", "faerie", "faery", "fag", "fagged", "faggot", "fagot", "fahrenheit", "faience", "fail", "failing", "failure", "fain", "faint", "fair", "fairground", "fairly", "fairway", "fairy", "fairyland", "faith", "faithful", "faithfully", "faithless", 
 "fake", "fakir", "falcon", "falconer", "falconry", "fall", "fallacious", "fallacy", "fallen", "fallible", "fallout", "fallow", "falls", "false", "falsehood", "falsetto", "falsies", "falsify", "falsity", "falter", "fame", "famed", "familial", "familiar", "familiarise", "familiarity", "familiarize", "familiarly", "family", "famine", "famish", "famished", "famous", "famously", "fan", "fanatic", "fanaticism", "fancier", "fancies", "fanciful", "fancy", "fancywork", "fandango", "fanfare", "fang", "fanlight", "fanny", "fantasia", "fantastic", "fantasy", "far", "faraway", "farce", "fare", "farewell", "farfetched", "farinaceous", "farm", "farmer", "farmhand", "farmhouse", "farming", "farmyard", "farrago", "farrier", "farrow", "farsighted", "fart", "farther", "farthest", "farthing", "fascia", "fascinate", "fascinating", "fascination", "fascism", "fascist", "fashion", "fashionable", "fast", "fasten", "fastener", "fastening", "fastidious", "fastness", "fat", "fatal", "fatalism", "fatalist", "fa
 tality", "fatally", "fate", "fated", "fateful", "fates", "fathead", "father", "fatherhood", "fatherly", "fathom", "fathomless", "fatigue", "fatigues", "fatless", "fatted", "fatten", "fatty", "fatuity", "fatuous", "faucet", "fault", "faultfinding", "faultless", "faulty", "faun", "fauna", "favor", "favorable", "favored", "favorite", "favoritism", "favour", "favourable", "favoured", "favourite", "favouritism", "favours", "fawn", "fay", "faze", "fbi", "fealty", "fear", "fearful", "fearless", "fearsome", "feasible", "feast", "feat", "feather", "featherbed", "featherbrained", "featherweight", "feathery", "feature", "featureless", "features", "febrile", "february", "feces", "feckless", "fecund", "fed", "federal", "federalism", "federalist", "federate", "federation", "fee", "feeble", "feebleminded", "feed", "feedback", "feedbag", "feeder", "feel", "feeler", "feeling", "feelings", "feet", "feign", "feint", "feldspar", "felicitate", "felicitous", "felicity", "feline", "fell", "fellah", "fella
 tio", "fellow", "fellowship", "felon", "felony", "felspar", "felt", "felucca", "fem", "female", "feminine", "femininity", "feminism", "feminist", "femur", "fen", "fence", "fencer", "fencing", "fend", "fender", "fennel", "feoff", "feral", "ferment", "fermentation", "fern", "ferocious", "ferocity", "ferret", "ferroconcrete", "ferrous", "ferrule", "ferry", "ferryboat", "ferryman", "fertile", "fertilise", "fertility", "fertilize", "fertilizer", "ferule", "fervent", "fervid", "fervor", "fervour", "festal", "fester", "festival", "festive", "festivity", "festoon", "fetal", "fetch", "fetching", "fete", "fetid", "fetish", "fetishism", "fetishist", "fetlock", "fetter", "fettle", "fetus", "feud", "feudal", "feudalism", "feudatory", "fever", "fevered", "feverish", "feverishly", "few", "fey", "fez", "fiasco", "fiat", "fib", "fiber", "fiberboard", "fiberglass", "fibre", "fibreboard", "fibreglass", "fibrositis", "fibrous", "fibula", "fichu", "fickle", "fiction", "fictional", "fictionalisation", "f
 ictionalization", "fictitious", "fiddle", "fiddler", "fiddlesticks", "fiddling", "fidelity", "fidget", "fidgets", "fidgety", "fie", "fief", "field", "fielder", "fieldwork", "fiend", "fiendish", "fiendishly", "fierce", "fiery", "fiesta", "fife", "fifteen", "fifth", "fifty", "fig", "fight", "fighter", "figment", "figurative", "figure", "figured", "figurehead", "figures", "figurine", "filament", "filbert", "filch", "file", "filet", "filial", "filibuster", "filigree", "filings", "fill", "filler", "fillet", "filling", "fillip", "filly", "film", "filmable", "filmstrip", "filmy", "filter", "filth", "filthy", "fin", "finable", "final", "finale", "finalise", "finalist", "finality", "finalize", "finally", "finance", "finances", "financial", "financially", "financier", "finch", "find", "finder", "finding", "fine", "fineable", "finely", "finery", "finesse", "finger", "fingerboard", "fingering", "fingernail", "fingerplate", "fingerpost", "fingerprint", "fingerstall", "fingertip", "finicky", "fin
 is", "finish", "finished", "finite", "fink", "fiord", "fir", "fire", "firearm", "fireball", "firebomb", "firebox", "firebrand", "firebreak", "firebrick", "firebug", "fireclay", "firecracker", "firedamp", "firedog", "firefly", "fireguard", "firelight", "firelighter", "fireman", "fireplace", "firepower", "fireproof", "fireside", "firestorm", "firetrap", "firewalking", "firewatcher", "firewater", "firewood", "firework", "fireworks", "firkin", "firm", "firmament", "first", "firstborn", "firstfruits", "firsthand", "firstly", "firth", "firtree", "fiscal", "fish", "fishcake", "fisherman", "fishery", "fishing", "fishmonger", "fishplate", "fishwife", "fishy", "fissile", "fission", "fissionable", "fissure", "fist", "fisticuffs", "fistula", "fit", "fitful", "fitment", "fitness", "fitted", "fitter", "fitting", "five", "fiver", "fives", "fix", "fixation", "fixative", "fixed", "fixedly", "fixity", "fixture", "fizz", "fizzle", "fizzy", "fjord", "flabbergast", "flabby", "flaccid", "flag", "flagella
 nt", "flagellate", "flageolet", "flagon", "flagpole", "flagrancy", "flagrant", "flagship", "flagstaff", "flagstone", "flail", "flair", "flak", "flake", "flaky", "flambeau", "flamboyant", "flame", "flamenco", "flaming", "flamingo", "flammable", "flan", "flange", "flank", "flannel", "flannelette", "flannels", "flap", "flapjack", "flapper", "flare", "flared", "flares", "flash", "flashback", "flashbulb", "flashcube", "flasher", "flashgun", "flashlight", "flashy", "flask", "flat", "flatcar", "flatfish", "flatfoot", "flatiron", "flatlet", "flatly", "flatten", "flatter", "flattery", "flattop", "flatulence", "flaunt", "flautist", "flavor", "flavoring", "flavour", "flavouring", "flaw", "flawless", "flax", "flaxen", "flay", "flea", "fleabag", "fleabite", "fleapit", "fleck", "fledged", "fledgling", "flee", "fleece", "fleecy", "fleet", "fleeting", "flesh", "fleshings", "fleshly", "fleshpot", "fleshy", "flew", "flex", "flexible", "flibbertigibbet", "flick", "flicker", "flicks", "flier", "flies",
  "flight", "flightless", "flighty", "flimsy", "flinch", "fling", "flint", "flintlock", "flinty", "flip", "flippancy", "flippant", "flipper", "flipping", "flirt", "flirtation", "flirtatious", "flit", "flitch", "flivver", "float", "floatation", "floating", "flock", "floe", "flog", "flogging", "flood", "floodgate", "floodlight", "floor", "floorboard", "flooring", "floorwalker", "floosy", "floozy", "flop", "floppy", "flora", "floral", "floriculture", "florid", "florin", "florist", "floss", "flotation", "flotilla", "flounce", "flounder", "flour", "flourish", "flourmill", "floury", "flout", "flow", "flower", "flowerbed", "flowered", "flowering", "flowerless", "flowerpot", "flowery", "flowing", "flown", "flu", "fluctuate", "flue", "fluency", "fluent", "fluff", "fluffy", "fluid", "fluidity", "fluke", "flukey", "fluky", "flume", "flummery", "flummox", "flung", "flunk", "flunkey", "flunky", "fluorescent", "fluoridate", "fluoride", "fluorine", "flurry", "flush", "flushed", "fluster", "flute", 
 "fluting", "flutist", "flutter", "fluvial", "flux", "fly", "flyaway", "flyblown", "flyby", "flycatcher", "flyer", "flying", "flyleaf", "flyover", "flypaper", "flypast", "flysheet", "flyswatter", "flytrap", "flyweight", "flywheel", "flywhisk", "foal", "foam", "fob", "focal", "focus", "fodder", "foe", "foeman", "foetal", "foetus", "fog", "fogbank", "fogbound", "fogey", "foggy", "foghorn", "fogy", "foible", "foil", "foist", "fold", "foldaway", "folder", "foliage", "folio", "folk", "folklore", "folklorist", "folks", "folksy", "folktale", "folkway", "follicle", "follow", "follower", "following", "folly", "foment", "fomentation", "fond", "fondant", "fondle", "fondly", "fondu", "fondue", "font", "food", "foodstuff", "fool", "foolery", "foolhardy", "foolish", "foolproof", "foolscap", "foot", "footage", "football", "footbath", "footboard", "footbridge", "footer", "footfall", "foothill", "foothold", "footing", "footle", "footlights", "footling", "footloose", "footman", "footnote", "footpad", 
 "footpath", "footplate", "footprint", "footrace", "footsie", "footslog", "footsore", "footstep", "footstool", "footsure", "footwear", "footwork", "fop", "foppish", "for", "forage", "foray", "forbear", "forbearance", "forbearing", "forbid", "forbidden", "forbidding", "force", "forced", "forceful", "forcemeat", "forceps", "forces", "forcible", "forcibly", "ford", "fore", "forearm", "forebode", "foreboding", "forecast", "forecastle", "foreclose", "foreclosure", "forecourt", "foredoomed", "forefather", "forefinger", "forefoot", "forefront", "forego", "foregoing", "foreground", "forehand", "forehead", "foreign", "foreigner", "foreknowledge", "foreland", "foreleg", "forelock", "foreman", "foremost", "forename", "forenoon", "forensic", "foreordain", "forepart", "foreplay", "forerunner", "foresail", "foresee", "foreseeable", "foreshadow", "foreshore", "foreshorten", "foresight", "foreskin", "forest", "forestall", "forester", "forestry", "foreswear", "foretaste", "foretell", "forethought", "
 forever", "forewarn", "forewent", "forewoman", "foreword", "forfeit", "forfeiture", "forgather", "forgave", "forge", "forger", "forgery", "forget", "forgetful", "forging", "forgivable", "forgive", "forgiveable", "forgiveness", "forgiving", "forgo", "fork", "forked", "forkful", "forklift", "forlorn", "form", "formal", "formaldehyde", "formalin", "formalise", "formalism", "formality", "formalize", "format", "formation", "formative", "formbook", "former", "formerly", "formica", "formidable", "formless", "formula", "formulaic", "formulate", "formulation", "fornicate", "fornication", "forrader", "forsake", "forsooth", "forswear", "forsythia", "fort", "forte", "forth", "forthcoming", "forthright", "forthwith", "fortieth", "fortification", "fortify", "fortissimo", "fortitude", "fortnight", "fortnightly", "fortress", "fortuitous", "fortunate", "fortunately", "fortune", "forty", "forum", "forward", "forwarding", "forwardly", "forwardness", "forwent", "foss", "fosse", "fossil", "fossilise", "
 fossilize", "foster", "fought", "foul", "found", "foundation", "foundations", "founder", "foundling", "foundry", "fount", "fountain", "fountainhead", "four", "foureyes", "fourpenny", "fours", "foursquare", "fourteen", "fourth", "fowl", "fox", "foxglove", "foxhole", "foxhound", "foxhunt", "foxtrot", "foxy", "foyer", "fracas", "fraction", "fractional", "fractionally", "fractious", "fracture", "fragile", "fragment", "fragmentary", "fragmentation", "fragrance", "fragrant", "frail", "frailty", "frame", "frames", "framework", "franc", "franchise", "franciscan", "frank", "frankfurter", "frankincense", "franklin", "frankly", "frantic", "fraternal", "fraternise", "fraternity", "fraternize", "fratricide", "frau", "fraud", "fraudulence", "fraudulent", "fraught", "fraulein", "fray", "frazzle", "freak", "freakish", "freckle", "free", "freebee", "freebie", "freeboard", "freebooter", "freeborn", "freedman", "freedom", "freehand", "freehanded", "freehold", "freeholder", "freelance", "freeload", "fr
 eely", "freeman", "freemason", "freemasonry", "freepost", "freesia", "freestanding", "freestone", "freestyle", "freethinker", "freeway", "freewheel", "freewheeling", "freewill", "freeze", "freezer", "freezing", "freight", "freighter", "freightliner", "frenchman", "frenetic", "frenzied", "frenzy", "frequency", "frequent", "fresco", "fresh", "freshen", "fresher", "freshet", "freshly", "freshwater", "fret", "fretful", "fretsaw", "fretwork", "freudian", "friable", "friar", "friary", "fricassee", "fricative", "friction", "friday", "fridge", "friend", "friendless", "friendly", "friends", "friendship", "frier", "frieze", "frig", "frigate", "frigging", "fright", "frighten", "frightened", "frightful", "frightfully", "frigid", "frigidity", "frill", "frilled", "frills", "frilly", "fringe", "frippery", "frisbee", "frisian", "frisk", "frisky", "frisson", "fritter", "frivolity", "frivolous", "frizz", "frizzle", "frizzy", "fro", "frock", "frog", "frogged", "frogman", "frogmarch", "frogspawn", "fro
 lic", "frolicsome", "from", "frond", "front", "frontage", "frontal", "frontbench", "frontier", "frontiersman", "frontispiece", "frost", "frostbite", "frostbitten", "frostbound", "frosting", "frosty", "froth", "frothy", "frown", "frowst", "frowsty", "frowsy", "frowzy", "froze", "frozen", "frs", "fructification", "fructify", "frugal", "frugality", "fruit", "fruitcake", "fruiterer", "fruitful", "fruition", "fruitless", "fruits", "fruity", "frump", "frustrate", "frustration", "fry", "fryer", "fuchsia", "fuck", "fucker", "fucking", "fuddle", "fudge", "fuehrer", "fuel", "fug", "fugitive", "fugue", "fuhrer", "fulcrum", "fulfil", "fulfill", "fulfillment", "fulfilment", "full", "fullback", "fuller", "fully", "fulmar", "fulminate", "fulmination", "fulness", "fulsome", "fumble", "fume", "fumes", "fumigate", "fun", "function", "functional", "functionalism", "functionalist", "functionary", "fund", "fundamental", "fundamentalism", "fundamentally", "funds", "funeral", "funerary", "funereal", "funf
 air", "fungicide", "fungoid", "fungous", "fungus", "funicular", "funk", "funky", "funnel", "funnies", "funnily", "funny", "fur", "furbelow", "furbish", "furious", "furiously", "furl", "furlong", "furlough", "furnace", "furnish", "furnishings", "furniture", "furore", "furrier", "furrow", "furry", "further", "furtherance", "furthermore", "furthermost", "furthest", "furtive", "fury", "furze", "fuse", "fused", "fuselage", "fusilier", "fusillade", "fusion", "fuss", "fusspot", "fussy", "fustian", "fusty", "futile", "futility", "future", "futureless", "futures", "futurism", "futuristic", "futurity", "fuzz", "fuzzy", "gab", "gabardine", "gabble", "gaberdine", "gable", "gabled", "gad", "gadabout", "gadfly", "gadget", "gadgetry", "gaelic", "gaff", "gaffe", "gaffer", "gag", "gaga", "gaggle", "gaiety", "gaily", "gain", "gainful", "gainfully", "gainsay", "gait", "gaiter", "gal", "gala", "galactic", "galantine", "galaxy", "gale", "gall", "gallant", "gallantry", "galleon", "gallery", "galley", "ga
 llic", "gallicism", "gallivant", "gallon", "gallop", "galloping", "gallows", "gallstone", "galore", "galosh", "galumph", "galvanic", "galvanise", "galvanism", "galvanize", "gambit", "gamble", "gamboge", "gambol", "game", "gamecock", "gamekeeper", "games", "gamesmanship", "gamey", "gamma", "gammon", "gammy", "gamp", "gamut", "gamy", "gander", "gang", "ganger", "gangling", "ganglion", "gangplank", "gangrene", "gangster", "gangway", "gannet", "gantry", "gaol", "gaolbird", "gaoler", "gap", "gape", "gapes", "garage", "garb", "garbage", "garble", "garden", "gardenia", "gardening", "gargantuan", "gargle", "gargoyle", "garish", "garland", "garlic", "garment", "garner", "garnet", "garnish", "garret", "garrison", "garrote", "garrotte", "garrulity", "garrulous", "garter", "gas", "gasbag", "gaseous", "gash", "gasholder", "gasify", "gasket", "gaslight", "gasman", "gasolene", "gasoline", "gasp", "gassy", "gastric", "gastritis", "gastroenteritis", "gastronomy", "gasworks", "gat", "gate", "gatecras
 h", "gatehouse", "gatekeeper", "gatepost", "gateway", "gather", "gathering", "gauche", "gaucherie", "gaucho", "gaudy", "gauge", "gaunt", "gauntlet", "gauze", "gave", "gavel", "gavotte", "gawk", "gawky", "gawp", "gay", "gayness", "gaze", "gazebo", "gazelle", "gazette", "gazetteer", "gazump", "gce", "gear", "gearbox", "gecko", "gee", "geese", "geezer", "geisha", "gel", "gelatine", "gelatinous", "geld", "gelding", "gelignite", "gem", "gemini", "gen", "gendarme", "gender", "gene", "genealogist", "genealogy", "genera", "general", "generalisation", "generalise", "generalissimo", "generality", "generalization", "generalize", "generally", "generate", "generation", "generative", "generator", "generic", "generous", "genesis", "genetic", "geneticist", "genetics", "genial", "geniality", "genie", "genital", "genitals", "genitive", "genius", "genocide", "genre", "gent", "genteel", "gentian", "gentile", "gentility", "gentle", "gentlefolk", "gentleman", "gentlemanly", "gentlewoman", "gently", "gent
 ry", "gents", "genuflect", "genuine", "genus", "geocentric", "geographer", "geography", "geologist", "geology", "geometric", "geometry", "geophysics", "geopolitics", "georgette", "geranium", "geriatric", "geriatrician", "geriatrics", "germ", "germane", "germanic", "germicide", "germinal", "germinate", "gerontology", "gerrymander", "gerund", "gestalt", "gestapo", "gestation", "gesticulate", "gesture", "get", "getaway", "getup", "geum", "gewgaw", "geyser", "gharry", "ghastly", "ghat", "ghaut", "ghee", "gherkin", "ghetto", "ghi", "ghost", "ghostly", "ghoul", "ghoulish", "ghq", "ghyll", "giant", "giantess", "gibber", "gibberish", "gibbet", "gibbon", "gibbous", "gibe", "giblets", "giddy", "gift", "gifted", "gig", "gigantic", "giggle", "gigolo", "gild", "gilded", "gilding", "gill", "gillie", "gilly", "gilt", "gimcrack", "gimlet", "gimmick", "gimmicky", "gin", "ginger", "gingerbread", "gingerly", "gingham", "gingivitis", "gingko", "ginkgo", "ginseng", "gipsy", "giraffe", "gird", "girder", 
 "girdle", "girl", "girlfriend", "girlhood", "girlie", "girlish", "girly", "giro", "girt", "girth", "gist", "give", "giveaway", "given", "gizzard", "glacial", "glacier", "glad", "gladden", "glade", "gladiator", "gladiolus", "gladly", "glamor", "glamorise", "glamorize", "glamorous", "glamour", "glamourous", "glance", "glancing", "gland", "glandular", "glare", "glaring", "glass", "glassblower", "glasscutter", "glasses", "glasshouse", "glassware", "glassworks", "glassy", "glaucoma", "glaucous", "glaze", "glazier", "glazing", "glc", "gleam", "glean", "gleaner", "gleanings", "glebe", "glee", "gleeful", "glen", "glengarry", "glib", "glide", "glider", "gliding", "glimmer", "glimmerings", "glimpse", "glint", "glissade", "glissando", "glisten", "glister", "glitter", "glittering", "gloaming", "gloat", "global", "globe", "globefish", "globetrotter", "globular", "globule", "glockenspiel", "gloom", "gloomy", "gloria", "glorification", "glorify", "glorious", "glory", "gloss", "glossary", "glossy",
  "glottal", "glottis", "glove", "glow", "glower", "glowing", "glucose", "glue", "gluey", "glum", "glut", "gluten", "glutinous", "glutton", "gluttonous", "gluttony", "glycerin", "glycerine", "gnarled", "gnash", "gnat", "gnaw", "gnawing", "gneiss", "gnocchi", "gnome", "gnp", "gnu", "goad", "goal", "goalkeeper", "goalmouth", "goalpost", "goat", "goatee", "goatherd", "goatskin", "gob", "gobbet", "gobble", "gobbledegook", "gobbledygook", "gobbler", "goblet", "goblin", "god", "godchild", "goddam", "goddamn", "goddie", "godforsaken", "godhead", "godless", "godlike", "godly", "godown", "godparent", "gods", "godsend", "godspeed", "goer", "goggle", "goggles", "goings", "goiter", "goitre", "gold", "goldbeater", "golden", "goldfield", "goldfinch", "goldfish", "goldmine", "goldsmith", "golf", "goliath", "golliwog", "golly", "gollywog", "gonad", "gondola", "gondolier", "gone", "goner", "gong", "gonna", "gonorrhea", "gonorrhoea", "goo", "good", "goodbye", "goodish", "goodly", "goodness", "goodnigh
 t", "goods", "goodwill", "goody", "gooey", "goof", "goofy", "googly", "goon", "goose", "gooseberry", "gooseflesh", "goosestep", "gopher", "gore", "gorge", "gorgeous", "gorgon", "gorgonzola", "gorilla", "gormandise", "gormandize", "gormless", "gorse", "gory", "gosh", "gosling", "gospel", "gossamer", "gossip", "gossipy", "got", "gothic", "gotta", "gotten", "gouache", "gouda", "gouge", "goulash", "gourd", "gourmand", "gourmet", "gout", "gouty", "govern", "governance", "governess", "governing", "government", "governor", "gown", "gpo", "grab", "grace", "graceful", "graceless", "graces", "gracious", "gradation", "grade", "gradient", "gradual", "graduate", "graduation", "graffiti", "graft", "grafter", "grail", "grain", "gram", "grammar", "grammarian", "grammatical", "gramme", "gramophone", "grampus", "gran", "granary", "grand", "grandad", "grandchild", "granddad", "granddaughter", "grandee", "grandeur", "grandfather", "grandiloquent", "grandiose", "grandma", "grandmother", "grandpa", "gran
 dparent", "grandson", "grandstand", "grange", "granite", "grannie", "granny", "grant" };
+        internal static string[] data = new string[] {
+            "distasteful","distemper","distempered","distend","distension",
+            "distil","distill","distillation","distiller","distillery",
+            "distinct","distinction","distinctive","distinguish","distinguishable",
+            "distinguished","distort","distortion","distract","distracted",
+            "distraction","distrain","distraint","distrait","distraught",
+            "distress","distressing","distribute","distribution","distributive",
+            "distributor","district","distrust","distrustful","disturb",
+            "disturbance","disturbed","disunion","disunite","disunity",
+            "disuse","disused","disyllabic","disyllable","ditch",
+            "dither","dithers","ditto","ditty","diuretic",
+            "diurnal","divagate","divan","dive","diver",
+            "diverge","divergence","divers","diverse","diversify",
+            "diversion","diversionary","diversity","divert","divertimento",
+            "divertissement","divest","divide","dividend","dividers",
+            "divination","divine","diviner","divingboard","divinity",
+            "divisible","division","divisive","divisor","divorce",
+            "divot","divulge","divvy","dixie","dixieland",
+            "dizzy","djinn","dna","do","dobbin",
+            "doc","docile","dock","docker","docket",
+            "dockyard","doctor","doctoral","doctorate","doctrinaire",
+            "doctrinal","doctrine","document","documentary","documentation",
+            "dodder","doddering","doddle","dodge","dodgems",
+            "dodger","dodgy","dodo","doe","doer",
+            "doeskin","doff","dog","dogcart","dogcatcher",
+            "dogfight","dogfish","dogged","doggerel","doggie",
+            "doggo","doggone","doggy","doghouse","dogie",
+            "dogleg","dogma","dogmatic","dogmatics","dogmatism",
+            "dogs","dogsbody","dogtooth","dogtrot","dogwood",
+            "doh","doily","doings","doldrums","dole",
+            "doleful","doll","dollar","dollop","dolly",
+            "dolmen","dolor","dolorous","dolour","dolphin",
+            "dolt","domain","dome","domed","domestic",
+            "domesticate","domesticity","domicile","domiciliary","dominance",
+            "dominant","dominate","domination","domineer","dominican",
+            "dominion","domino","dominoes","don","donate",
+            "donation","donjon","donkey","donkeywork","donnish",
+            "donor","doodle","doodlebug","doom","doomsday",
+            "door","doorbell","doorframe","doorkeeper","doorknob",
+            "doorknocker","doorman","doormat","doornail","doorplate",
+            "doorscraper","doorstep","doorstopper","doorway","dope",
+            "dopey","dopy","doric","dormant","dormer",
+            "dormitory","dormouse","dorsal","dory","dosage",
+            "dose","doss","dosser","dosshouse","dossier",
+            "dost","dot","dotage","dote","doth",
+            "doting","dottle","dotty","double","doubles",
+            "doublet","doublethink","doubloon","doubly","doubt",
+            "doubtful","doubtless","douche","dough","doughnut",
+            "doughty","doughy","dour","douse","dove",
+            "dovecote","dovetail","dowager","dowdy","dowel",
+            "dower","down","downbeat","downcast","downdraft",
+            "downdraught","downer","downfall","downgrade","downhearted",
+            "downhill","downpour","downright","downstage","downstairs",
+            "downstream","downtown","downtrodden","downward","downwards",
+            "downwind","downy","dowry","dowse","doxology",
+            "doyen","doyley","doze","dozen","dozy",
+            "dpt","drab","drabs","drachm","drachma",
+            "draconian","draft","draftee","draftsman","drafty",
+            "drag","draggled","draggy","dragnet","dragoman",
+            "dragon","dragonfly","dragoon","drain","drainage",
+            "drainpipe","drake","dram","drama","dramatic",
+            "dramatics","dramatise","dramatist","dramatize","drank",
+            "drape","draper","drapery","drastic","drat",
+            "draught","draughtboard","draughts","draughtsman","draughty",
+            "draw","drawback","drawbridge","drawer","drawers",
+            "drawing","drawl","drawn","drawstring","dray",
+            "dread","dreadful","dreadfully","dreadnaught","dreadnought",
+            "dream","dreamboat","dreamer","dreamland","dreamless",
+            "dreamlike","dreamy","drear","dreary","dredge",
+            "dredger","dregs","drench","dress","dressage",
+            "dresser","dressing","dressmaker","dressy","drew",
+            "dribble","driblet","dribs","drier","drift",
+            "driftage","drifter","driftnet","driftwood","drill",
+            "drily","drink","drinkable","drinker","drip",
+            "dripping","drive","drivel","driver","driveway",
+            "driving","drizzle","drogue","droll","drollery",
+            "dromedary","drone","drool","droop","drop",
+            "dropkick","droplet","dropout","dropper","droppings",
+            "drops","dropsy","dross","drought","drove",
+            "drover","drown","drowse","drowsy","drub",
+            "drudge","drudgery","drug","drugget","druggist",
+            "drugstore","druid","drum","drumbeat","drumfire",
+            "drumhead","drummer","drumstick","drunk","drunkard",
+            "drunken","drupe","dry","dryad","dryer",
+            "dual","dub","dubbin","dubiety","dubious",
+            "ducal","ducat","duchess","duchy","duck",
+            "duckboards","duckling","ducks","duckweed","ducky",
+            "duct","ductile","dud","dude","dudgeon",
+            "duds","due","duel","duenna","dues",
+            "duet","duff","duffel","duffer","duffle",
+            "dug","dugout","duke","dukedom","dukes",
+            "dulcet","dulcimer","dull","dullard","duly",
+            "dumb","dumbbell","dumbfound","dumbwaiter","dumfound",
+            "dummy","dump","dumper","dumpling","dumps",
+            "dumpy","dun","dunce","dunderhead","dung",
+            "dungaree","dungarees","dungeon","dunghill","dunk",
+            "duo","duodecimal","duodenum","duologue","dupe",
+            "duplex","duplicate","duplicator","duplicity","durable",
+            "duration","durbar","duress","durex","during",
+            "durst","dusk","dusky","dust","dustbin",
+            "dustbowl","dustcart","dustcoat","duster","dustman",
+            "dustpan","dustsheet","dustup","dusty","dutch",
+            "dutiable","dutiful","duty","duvet","dwarf",
+            "dwell","dwelling","dwindle","dyarchy","dye",
+            "dyestuff","dyeworks","dyke","dynamic","dynamics",
+            "dynamism","dynamite","dynamo","dynasty","dysentery",
+            "dyslexia","dyspepsia","dyspeptic","each","eager",
+            "eagle","eaglet","ear","earache","eardrum",
+            "eared","earful","earl","earliest","earlobe",
+            "early","earmark","earmuff","earn","earnest",
+            "earnings","earphone","earpiece","earplug","earring",
+            "earshot","earth","earthbound","earthen","earthenware",
+            "earthling","earthly","earthnut","earthquake","earthshaking",
+            "earthwork","earthworm","earthy","earwax","earwig",
+            "ease","easel","easily","east","eastbound",
+            "easter","easterly","eastern","easterner","easternmost",
+            "easy","easygoing","eat","eatable","eatables",
+            "eater","eats","eaves","eavesdrop","ebb",
+            "ebony","ebullience","ebullient","eccentric","eccentricity",
+            "ecclesiastic","ecclesiastical","ecg","echelon","echo",
+            "eclectic","eclipse","ecliptic","eclogue","ecological",
+            "ecologically","ecology","economic","economical","economically",
+            "economics","economise","economist","economize","economy",
+            "ecosystem","ecstasy","ecstatic","ect","ectoplasm",
+            "ecumenical","ecumenicalism","eczema","edam","eddy",
+            "edelweiss","eden","edge","edgeways","edging",
+            "edgy","edible","edibles","edict","edification",
+            "edifice","edify","edit","edition","editor",
+            "editorial","editorialise","editorialize","educate","educated",
+            "education","educational","educationist","educator","educe",
+            "eec","eeg","eel","eerie","efface",
+            "effect","effective","effectively","effectiveness","effectives",
+            "effects","effectual","effectually","effectuate","effeminacy",
+            "effeminate","effendi","effervesce","effete","efficacious",
+            "efficacy","efficiency","efficient","effigy","efflorescence",
+            "effluent","efflux","effort","effortless","effrontery",
+            "effulgence","effulgent","effusion","effusive","eft",
+            "egalitarian","egg","eggcup","egghead","eggnog",
+            "eggplant","eggshell","egis","eglantine","ego",
+            "egocentric","egoism","egoist","egotism","egotist",
+            "egregious","egress","egret","eiderdown","eight",
+            "eighteen","eightsome","eighty","eisteddfod","either",
+            "ejaculate","ejaculation","eject","ejector","eke",
+            "ekg","elaborate","elaboration","eland","elapse",
+            "elastic","elasticity","elastoplast","elate","elated",
+            "elation","elbow","elbowroom","elder","elderberry",
+            "elderflower","elderly","eldest","elect","election",
+            "electioneer","electioneering","elective","elector","electoral",
+            "electorate","electric","electrical","electrician","electricity",
+            "electrify","electrocardiogram","electrocardiograph","electrocute","electrode",
+            "electroencephalogram","electroencephalograph","electrolysis","electrolyte","electron",
+            "electronic","electronics","electroplate","eleemosynary","elegant",
+            "elegiac","elegy","element","elemental","elementary",
+            "elements","elephant","elephantiasis","elephantine","elevate",
+            "elevated","elevation","elevator","eleven","elevenses",
+            "elf","elfin","elfish","elicit","elide",
+            "eligible","eliminate","elite","elitism","elixir",
+            "elizabethan","elk","elkhound","ellipse","ellipsis",
+            "elliptic","elm","elocution","elocutionary","elocutionist",
+            "elongate","elongation","elope","eloquence","eloquent",
+            "else","elsewhere","elucidate","elucidatory","elude",
+            "elusive","elver","elves","elvish","elysian",
+            "elysium","emaciate","emanate","emancipate","emancipation",
+            "emasculate","embalm","embankment","embargo","embark",
+            "embarkation","embarrass","embarrassment","embassy","embattled",
+            "embed","embellish","ember","embezzle","embitter",
+            "emblazon","emblem","emblematic","embodiment","embody",
+            "embolden","embolism","embonpoint","embosomed","emboss",
+            "embowered","embrace","embrasure","embrocation","embroider",
+            "embroidery","embroil","embryo","embryonic","emend",
+            "emendation","emerald","emerge","emergence","emergency",
+            "emergent","emeritus","emery","emetic","emigrant",
+            "emigrate","eminence","eminent","eminently","emir",
+            "emirate","emissary","emission","emit","emmentaler",
+            "emmenthaler","emollient","emolument","emote","emotion",
+            "emotional","emotionalism","emotionally","emotive","empanel",
+            "empathy","emperor","emphasis","emphasise","emphasize",
+            "emphatic","emphatically","emphysema","empire","empirical",
+            "empiricism","emplacement","emplane","employ","employable",
+            "employee","employer","employment","emporium","empower",
+            "empress","emptily","empty","empurpled","empyreal",
+            "empyrean","emu","emulate","emulation","emulsify",
+            "emulsion","enable","enabling","enact","enactment",
+            "enamel","enamelware","enamored","enamoured","encamp",
+            "encampment","encapsulate","encase","encaustic","encephalitis",
+            "enchain","enchant","enchanter","enchanting","enchantment",
+            "encipher","encircle","enclave","enclose","enclosure",
+            "encode","encomium","encompass","encore","encounter",
+            "encourage","encouragement","encroach","encroachment","encrust",
+            "encumber","encumbrance","encyclical","encyclopaedia","encyclopaedic",
+            "encyclopedia","encyclopedic","end","endanger","endear",
+            "endearing","endearment","endeavor","endeavour","endemic",
+            "ending","endive","endless","endocrine","endorse",
+            "endow","endowment","endpaper","endurance","endure",
+            "enduring","endways","enema","enemy","energetic",
+            "energize","energy","enervate","enfeeble","enfilade",
+            "enfold","enforce","enfranchise","engage","engaged",
+            "engagement","engaging","engender","engine","engineer",
+            "engineering","english","englishman","engraft","engrave",
+            "engraving","engross","engrossing","engulf","enhance",
+            "enigma","enigmatic","enjoin","enjoy","enjoyable",
+            "enjoyment","enkindle","enlarge","enlargement","enlighten",
+            "enlightened","enlightenment","enlist","enliven","enmesh",
+            "enmity","ennoble","ennui","enormity","enormous",
+            "enormously","enough","enplane","enquire","enquiring",
+            "enquiry","enrage","enrapture","enrich","enrol",
+            "enroll","enrollment","enrolment","ensanguined","ensconce",
+            "ensemble","enshrine","enshroud","ensign","enslave",
+            "ensnare","ensue","ensure","entail","entangle",
+            "entanglement","entente","enter","enteritis","enterprise",
+            "enterprising","entertain","entertainer","entertaining","entertainment",
+            "enthral","enthrall","enthrone","enthroned","enthuse",
+            "enthusiasm","enthusiast","entice","enticement","entire",
+            "entirety","entitle","entity","entomb","entomology",
+            "entourage","entrails","entrain","entrance","entrant",
+            "entrap","entreat","entreaty","entrench","entrenched",
+            "entrenchment","entrepreneur","entresol","entropy","entrust",
+            "entry","entwine","enumerate","enunciate","enunciation",
+            "envelop","envenom","enviable","envious","environed",
+            "environment","environmental","environmentalist","environs","envisage",
+            "envoi","envoy","envy","enzyme","eon",
+            "epaulet","epaulette","ephemeral","epic","epicenter",
+            "epicentre","epicure","epicurean","epidemic","epidermis",
+            "epidiascope","epiglottis","epigram","epigrammatic","epilepsy",
+            "epileptic","epilogue","epiphany","episcopacy","episcopal",
+            "episcopalian","episode","episodic","epistle","epistolary",
+            "epitaph","epithet","epitome","epitomise","epitomize",
+            "epoch","eponymous","equability","equable","equal",
+            "equalise","equalitarian","equality","equalize","equally",
+            "equanimity","equate","equation","equator","equatorial",
+            "equerry","equestrian","equidistant","equilateral","equilibrium",
+            "equine","equinoctial","equinox","equip","equipage",
+            "equipment","equipoise","equitable","equitation","equities",
+            "equity","equivalence","equivalent","equivocal","equivocate",
+            "equivocation","era","eradicate","eradicator","erase",
+            "eraser","erasure","ere","erect","erectile",
+            "erection","eremite","erg","ergo","ergonomics",
+            "ermine","erode","erogenous","erosion","erotic",
+            "erotica","eroticism","err","errand","errant",
+            "erratic","erratum","erroneous","error","ersatz",
+            "erse","eructation","erudite","erupt","eruption",
+            "erysipelas","escalate","escalator","escalope","escapade",
+            "escape","escapee","escapement","escapism","escapology",
+            "escarpment","eschatology","eschew","escort","escritoire",
+            "escutcheon","eskimo","esophagus","esoteric","esp",
+            "espalier","especial","especially","esperanto","espionage",
+            "esplanade","espousal","espouse","espresso","espy",
+            "essay","essence","essential","essentially","establish",
+            "establishment","estaminet","estate","esteem","esthete",
+            "esthetic","esthetics","estimable","estimate","estimation",
+            "estimator","estrange","estrangement","estrogen","estuary",
+            "etch","etching","eternal","eternity","ether",
+            "ethereal","ethic","ethical","ethically","ethics",
+            "ethnic","ethnically","ethnographer","ethnography","ethnologist",
+            "ethnology","ethos","ethyl","etiolate","etiology",
+            "etiquette","etymologist","etymology","eucalyptus","eucharist",
+            "euclidean","euclidian","eugenic","eugenics","eulogise",
+            "eulogist","eulogistic","eulogize","eulogy","eunuch",
+            "euphemism","euphemistic","euphonious","euphonium","euphony",
+            "euphoria","euphuism","eurasian","eureka","eurhythmic",
+            "eurhythmics","eurocrat","eurodollar","eurythmic","eurythmics",
+            "euthanasia","evacuate","evacuee","evade","evaluate",
+            "evanescent","evangelic","evangelical","evangelise","evangelist",
+            "evangelize","evaporate","evasion","evasive","eve",
+            "even","evening","evenings","evens","evensong",
+            "event","eventful","eventide","eventual","eventuality",
+            "eventually","eventuate","ever","evergreen","everlasting",
+            "everlastingly","evermore","every","everybody","everyday",
+            "everything","everywhere","evict","evidence","evident",
+            "evidently","evil","evildoer","evince","eviscerate",
+            "evocative","evoke","evolution","evolutionary","evolve",
+            "ewe","ewer","exacerbate","exact","exacting",
+            "exaction","exactly","exaggerate","exaggeration","exalt",
+            "exaltation","exalted","exam","examination","examine",
+            "example","exasperate","exasperation","excavate","excavation",
+            "excavator","exceed","exceedingly","excel","excellence",
+            "excellency","excellent","excelsior","except","excepted",
+            "excepting","exception","exceptionable","exceptional","excerpt",
+            "excess","excesses","excessive","exchange","exchequer",
+            "excise","excision","excitable","excite","excited",
+            "excitement","exciting","exclaim","exclamation","exclamatory",
+            "exclude","excluding","exclusion","exclusive","exclusively",
+            "excogitate","excommunicate","excommunication","excoriate","excrement",
+            "excrescence","excreta","excrete","excretion","excruciating",
+            "exculpate","excursion","excursionist","excusable","excuse",
+            "execrable","execrate","executant","execute","execution",
+            "executioner","executive","executor","exegesis","exemplary",
+            "exemplification","exemplify","exempt","exemption","exercise",
+            "exercises","exert","exertion","exeunt","exhalation",
+            "exhale","exhaust","exhaustion","exhaustive","exhibit",
+            "exhibition","exhibitionism","exhibitor","exhilarate","exhilarating",
+            "exhort","exhortation","exhume","exigency","exigent",
+            "exiguous","exile","exist","existence","existent",
+            "existential","existentialism","existing","exit","exodus",
+            "exogamy","exonerate","exorbitant","exorcise","exorcism",
+            "exorcist","exorcize","exotic","expand","expanse",
+            "expansion","expansive","expatiate","expatriate","expect",
+            "expectancy","expectant","expectation","expectations","expectorate",
+            "expediency","expedient","expedite","expedition","expeditionary",
+            "expeditious","expel","expend","expendable","expenditure",
+            "expense","expenses","expensive","experience","experienced",
+            "experiment","experimental","experimentation","expert","expertise",
+            "expiate","expiration","expire","explain","explanation",
+            "explanatory","expletive","explicable","explicate","explicit",
+            "explode","exploded","exploit","exploration","exploratory",
+            "explore","explosion","explosive","expo","exponent",
+            "exponential","export","exportation","exporter","expose",
+            "exposition","expostulate","exposure","expound","express",
+            "expression","expressionism","expressionless","expressive","expressly",
+            "expressway","expropriate","expulsion","expunge","expurgate",
+            "exquisite","extant","extemporaneous","extempore","extemporise",
+            "extemporize","extend","extension","extensive","extent",
+            "extenuate","extenuation","exterior","exteriorise","exteriorize",
+            "exterminate","external","externalise","externalize","externally",
+            "externals","exterritorial","extinct","extinction","extinguish",
+            "extinguisher","extirpate","extol","extort","extortion",
+            "extortionate","extortions","extra","extract","extraction",
+            "extracurricular","extraditable","extradite","extrajudicial","extramarital",
+            "extramural","extraneous","extraordinarily","extraordinary","extrapolate",
+            "extraterrestrial","extraterritorial","extravagance","extravagant","extravaganza",
+            "extravert","extreme","extremely","extremism","extremities",
+            "extremity","extricate","extrinsic","extrovert","extrude",
+            "exuberance","exuberant","exude","exult","exultant",
+            "exultation","eye","eyeball","eyebrow","eyecup",
+            "eyeful","eyeglass","eyeglasses","eyelash","eyelet",
+            "eyelid","eyeliner","eyepiece","eyes","eyeshot",
+            "eyesight","eyesore","eyestrain","eyetooth","eyewash",
+            "eyewitness","eyot","eyrie","eyry","fabian",
+            "fable","fabled","fabric","fabricate","fabrication",
+            "fabulous","fabulously","face","facecloth","faceless",
+            "facet","facetious","facial","facile","facilitate",
+            "facilities","facility","facing","facings","facsimile",
+            "fact","faction","factious","factitious","factor",
+            "factorial","factorise","factorize","factory","factotum",
+            "factual","faculty","fad","fade","faeces",
+            "faerie","faery","fag","fagged","faggot",
+            "fagot","fahrenheit","faience","fail","failing",
+            "failure","fain","faint","fair","fairground",
+            "fairly","fairway","fairy","fairyland","faith",
+            "faithful","faithfully","faithless","fake","fakir",
+            "falcon","falconer","falconry","fall","fallacious",
+            "fallacy","fallen","fallible","fallout","fallow",
+            "falls","false","falsehood","falsetto","falsies",
+            "falsify","falsity","falter","fame","famed",
+            "familial","familiar","familiarise","familiarity","familiarize",
+            "familiarly","family","famine","famish","famished",
+            "famous","famously","fan","fanatic","fanaticism",
+            "fancier","fancies","fanciful","fancy","fancywork",
+            "fandango","fanfare","fang","fanlight","fanny",
+            "fantasia","fantastic","fantasy","far","faraway",
+            "farce","fare","farewell","farfetched","farinaceous",
+            "farm","farmer","farmhand","farmhouse","farming",
+            "farmyard","farrago","farrier","farrow","farsighted",
+            "fart","farther","farthest","farthing","fascia",
+            "fascinate","fascinating","fascination","fascism","fascist",
+            "fashion","fashionable","fast","fasten","fastener",
+            "fastening","fastidious","fastness","fat","fatal",
+            "fatalism","fatalist","fatality","fatally","fate",
+            "fated","fateful","fates","fathead","father",
+            "fatherhood","fatherly","fathom","fathomless","fatigue",
+            "fatigues","fatless","fatted","fatten","fatty",
+            "fatuity","fatuous","faucet","fault","faultfinding",
+            "faultless","faulty","faun","fauna","favor",
+            "favorable","favored","favorite","favoritism","favour",
+            "favourable","favoured","favourite","favouritism","favours",
+            "fawn","fay","faze","fbi","fealty",
+            "fear","fearful","fearless","fearsome","feasible",
+            "feast","feat","feather","featherbed","featherbrained",
+            "featherweight","feathery","feature","featureless","features",
+            "febrile","february","feces","feckless","fecund",
+            "fed","federal","federalism","federalist","federate",
+            "federation","fee","feeble","feebleminded","feed",
+            "feedback","feedbag","feeder","feel","feeler",
+            "feeling","feelings","feet","feign","feint",
+            "feldspar","felicitate","felicitous","felicity","feline",
+            "fell","fellah","fellatio","fellow","fellowship",
+            "felon","felony","felspar","felt","felucca",
+            "fem","female","feminine","femininity","feminism",
+            "feminist","femur","fen","fence","fencer",
+            "fencing","fend","fender","fennel","feoff",
+            "feral","ferment","fermentation","fern","ferocious",
+            "ferocity","ferret","ferroconcrete","ferrous","ferrule",
+            "ferry","ferryboat","ferryman","fertile","fertilise",
+            "fertility","fertilize","fertilizer","ferule","fervent",
+            "fervid","fervor","fervour","festal","fester",
+            "festival","festive","festivity","festoon","fetal",
+            "fetch","fetching","fete","fetid","fetish",
+            "fetishism","fetishist","fetlock","fetter","fettle",
+            "fetus","feud","feudal","feudalism","feudatory",
+            "fever","fevered","feverish","feverishly","few",
+            "fey","fez","fiasco","fiat","fib",
+            "fiber","fiberboard","fiberglass","fibre","fibreboard",
+            "fibreglass","fibrositis","fibrous","fibula","fichu",
+            "fickle","fiction","fictional","fictionalisation","fictionalization",
+            "fictitious","fiddle","fiddler","fiddlesticks","fiddling",
+            "fidelity","fidget","fidgets","fidgety","fie",
+            "fief","field","fielder","fieldwork","fiend",
+            "fiendish","fiendishly","fierce","fiery","fiesta",
+            "fife","fifteen","fifth","fifty","fig",
+            "fight","fighter","figment","figurative","figure",
+            "figured","figurehead","figures","figurine","filament",
+            "filbert","filch","file","filet","filial",
+            "filibuster","filigree","filings","fill","filler",
+            "fillet","filling","fillip","filly","film",
+            "filmable","filmstrip","filmy","filter","filth",
+            "filthy","fin","finable","final","finale",
+            "finalise","finalist","finality","finalize","finally",
+            "finance","finances","financial","financially","financier",
+            "finch","find","finder","finding","fine",
+            "fineable","finely","finery","finesse","finger",
+            "fingerboard","fingering","fingernail","fingerplate","fingerpost",
+            "fingerprint","fingerstall","fingertip","finicky","finis",
+            "finish","finished","finite","fink","fiord",
+            "fir","fire","firearm","fireball","firebomb",
+            "firebox","firebrand","firebreak","firebrick","firebug",
+            "fireclay","firecracker","firedamp","firedog","firefly",
+            "fireguard","firelight","firelighter","fireman","fireplace",
+            "firepower","fireproof","fireside","firestorm","firetrap",
+            "firewalking","firewatcher","firewater","firewood","firework",
+            "fireworks","firkin","firm","firmament","first",
+            "firstborn","firstfruits","firsthand","firstly","firth",
+            "firtree","fiscal","fish","fishcake","fisherman",
+            "fishery","fishing","fishmonger","fishplate","fishwife",
+            "fishy","fissile","fission","fissionable","fissure",
+            "fist","fisticuffs","fistula","fit","fitful",
+            "fitment","fitness","fitted","fitter","fitting",
+            "five","fiver","fives","fix","fixation",
+            "fixative","fixed","fixedly","fixity","fixture",
+            "fizz","fizzle","fizzy","fjord","flabbergast",
+            "flabby","flaccid","flag","flagellant","flagellate",
+            "flageolet","flagon","flagpole","flagrancy","flagrant",
+            "flagship","flagstaff","flagstone","flail","flair",
+            "flak","flake","flaky","flambeau","flamboyant",
+            "flame","flamenco","flaming","flamingo","flammable",
+            "flan","flange","flank","flannel","flannelette",
+            "flannels","flap","flapjack","flapper","flare",
+            "flared","flares","flash","flashback","flashbulb",
+            "flashcube","flasher","flashgun","flashlight","flashy",
+            "flask","flat","flatcar","flatfish","flatfoot",
+            "flatiron","flatlet","flatly","flatten","flatter",
+            "flattery","flattop","flatulence","flaunt","flautist",
+            "flavor","flavoring","flavour","flavouring","flaw",
+            "flawless","flax","flaxen","flay","flea",
+            "fleabag","fleabite","fleapit","fleck","fledged",
+            "fledgling","flee","fleece","fleecy","fleet",
+            "fleeting","flesh","fleshings","fleshly","fleshpot",
+            "fleshy","flew","flex","flexible","flibbertigibbet",
+            "flick","flicker","flicks","flier","flies",
+            "flight","flightless","flighty","flimsy","flinch",
+            "fling","flint","flintlock","flinty","flip",
+            "flippancy","flippant","flipper","flipping","flirt",
+            "flirtation","flirtatious","flit","flitch","flivver",
+            "float","floatation","floating","flock","floe",
+            "flog","flogging","flood","floodgate","floodlight",
+            "floor","floorboard","flooring","floorwalker","floosy",
+            "floozy","flop","floppy","flora","floral",
+            "floriculture","florid","florin","florist","floss",
+            "flotation","flotilla","flounce","flounder","flour",
+            "flourish","flourmill","floury","flout","flow",
+            "flower","flowerbed","flowered","flowering","flowerless",
+            "flowerpot","flowery","flowing","flown","flu",
+            "fluctuate","flue","fluency","fluent","fluff",
+            "fluffy","fluid","fluidity","fluke","flukey",
+            "fluky","flume","flummery","flummox","flung",
+            "flunk","flunkey","flunky","fluorescent","fluoridate",
+            "fluoride","fluorine","flurry","flush","flushed",
+            "fluster","flute","fluting","flutist","flutter",
+            "fluvial","flux","fly","flyaway","flyblown",
+            "flyby","flycatcher","flyer","flying","flyleaf",
+            "flyover","flypaper","flypast","flysheet","flyswatter",
+            "flytrap","flyweight","flywheel","flywhisk","foal",
+            "foam","fob","focal","focus","fodder",
+            "foe","foeman","foetal","foetus","fog",
+            "fogbank","fogbound","fogey","foggy","foghorn",
+            "fogy","foible","foil","foist","fold",
+            "foldaway","folder","foliage","folio","folk",
+            "folklore","folklorist","folks","folksy","folktale",
+            "folkway","follicle","follow","follower","following",
+            "folly","foment","fomentation","fond","fondant",
+            "fondle","fondly","fondu","fondue","font",
+            "food","foodstuff","fool","foolery","foolhardy",
+            "foolish","foolproof","foolscap","foot","footage",
+            "football","footbath","footboard","footbridge","footer",
+            "footfall","foothill","foothold","footing","footle",
+            "footlights","footling","footloose","footman","footnote",
+            "footpad","footpath","footplate","footprint","footrace",
+            "footsie","footslog","footsore","footstep","footstool",
+            "footsure","footwear","footwork","fop","foppish",
+            "for","forage","foray","forbear","forbearance",
+            "forbearing","forbid","forbidden","forbidding","force",
+            "forced","forceful","forcemeat","forceps","forces",
+            "forcible","forcibly","ford","fore","forearm",
+            "forebode","foreboding","forecast","forecastle","foreclose",
+            "foreclosure","forecourt","foredoomed","forefather","forefinger",
+            "forefoot","forefront","forego","foregoing","foreground",
+            "forehand","forehead","foreign","foreigner","foreknowledge",
+            "foreland","foreleg","forelock","foreman","foremost",
+            "forename","forenoon","forensic","foreordain","forepart",
+            "foreplay","forerunner","foresail","foresee","foreseeable",
+            "foreshadow","foreshore","foreshorten","foresight","foreskin",
+            "forest","forestall","forester","forestry","foreswear",
+            "foretaste","foretell","forethought","forever","forewarn",
+            "forewent","forewoman","foreword","forfeit","forfeiture",
+            "forgather","forgave","forge","forger","forgery",
+            "forget","forgetful","forging","forgivable","forgive",
+            "forgiveable","forgiveness","forgiving","forgo","fork",
+            "forked","forkful","forklift","forlorn","form",
+            "formal","formaldehyde","formalin","formalise","formalism",
+            "formality","formalize","format","formation","formative",
+            "formbook","former","formerly","formica","formidable",
+            "formless","formula","formulaic","formulate","formulation",
+            "fornicate","fornication","forrader","forsake","forsooth",
+            "forswear","forsythia","fort","forte","forth",
+            "forthcoming","forthright","forthwith","fortieth","fortification",
+            "fortify","fortissimo","fortitude","fortnight","fortnightly",
+            "fortress","fortuitous","fortunate","fortunately","fortune",
+            "forty","forum","forward","forwarding","forwardly",
+            "forwardness","forwent","foss","fosse","fossil",
+            "fossilise","fossilize","foster","fought","foul",
+            "found","foundation","foundations","founder","foundling",
+            "foundry","fount","fountain","fountainhead","four",
+            "foureyes","fourpenny","fours","foursquare","fourteen",
+            "fourth","fowl","fox","foxglove","foxhole",
+            "foxhound","foxhunt","foxtrot","foxy","foyer",
+            "fracas","fraction","fractional","fractionally","fractious",
+            "fracture","fragile","fragment","fragmentary","fragmentation",
+            "fragrance","fragrant","frail","frailty","frame",
+            "frames","framework","franc","franchise","franciscan",
+            "frank","frankfurter","frankincense","franklin","frankly",
+            "frantic","fraternal","fraternise","fraternity","fraternize",
+            "fratricide","frau","fraud","fraudulence","fraudulent",
+            "fraught","fraulein","fray","frazzle","freak",
+            "freakish","freckle","free","freebee","freebie",
+            "freeboard","freebooter","freeborn","freedman","freedom",
+            "freehand","freehanded","freehold","freeholder","freelance",
+            "freeload","freely","freeman","freemason","freemasonry",
+            "freepost","freesia","freestanding","freestone","freestyle",
+            "freethinker","freeway","freewheel","freewheeling","freewill",
+            "freeze","freezer","freezing","freight","freighter",
+            "freightliner","frenchman","frenetic","frenzied","frenzy",
+            "frequency","frequent","fresco","fresh","freshen",
+            "fresher","freshet","freshly","freshwater","fret",
+            "fretful","fretsaw","fretwork","freudian","friable",
+            "friar","friary","fricassee","fricative","friction",
+            "friday","fridge","friend","friendless","friendly",
+            "friends","friendship","frier","frieze","frig",
+            "frigate","frigging","fright","frighten","frightened",
+            "frightful","frightfully","frigid","frigidity","frill",
+            "frilled","frills","frilly","fringe","frippery",
+            "frisbee","frisian","frisk","frisky","frisson",
+            "fritter","frivolity","frivolous","frizz","frizzle",
+            "frizzy","fro","frock","frog","frogged",
+            "frogman","frogmarch","frogspawn","frolic","frolicsome",
+            "from","frond","front","frontage","frontal",
+            "frontbench","frontier","frontiersman","frontispiece","frost",
+            "frostbite","frostbitten","frostbound","frosting","frosty",
+            "froth","frothy","frown","frowst","frowsty",
+            "frowsy","frowzy","froze","frozen","frs",
+            "fructification","fructify","frugal","frugality","fruit",
+            "fruitcake","fruiterer","fruitful","fruition","fruitless",
+            "fruits","fruity","frump","frustrate","frustration",
+            "fry","fryer","fuchsia","fuck","fucker",
+            "fucking","fuddle","fudge","fuehrer","fuel",
+            "fug","fugitive","fugue","fuhrer","fulcrum",
+            "fulfil","fulfill","fulfillment","fulfilment","full",
+            "fullback","fuller","fully","fulmar","fulminate",
+            "fulmination","fulness","fulsome","fumble","fume",
+            "fumes","fumigate","fun","function","functional",
+            "functionalism","functionalist","functionary","fund","fundamental",
+            "fundamentalism","fundamentally","funds","funeral","funerary",
+            "funereal","funfair","fungicide","fungoid","fungous",
+            "fungus","funicular","funk","funky","funnel",
+            "funnies","funnily","funny","fur","furbelow",
+            "furbish","furious","furiously","furl","furlong",
+            "furlough","furnace","furnish","furnishings","furniture",
+            "furore","furrier","furrow","furry","further",
+            "furtherance","furthermore","furthermost","furthest","furtive",
+            "fury","furze","fuse","fused","fuselage",
+            "fusilier","fusillade","fusion","fuss","fusspot",
+            "fussy","fustian","fusty","futile","futility",
+            "future","futureless","futures","futurism","futuristic",
+            "futurity","fuzz","fuzzy","gab","gabardine",
+            "gabble","gaberdine","gable","gabled","gad",
+            "gadabout","gadfly","gadget","gadgetry","gaelic",
+            "gaff","gaffe","gaffer","gag","gaga",
+            "gaggle","gaiety","gaily","gain","gainful",
+            "gainfully","gainsay","gait","gaiter","gal",
+            "gala","galactic","galantine","galaxy","gale",
+            "gall","gallant","gallantry","galleon","gallery",
+            "galley","gallic","gallicism","gallivant","gallon",
+            "gallop","galloping","gallows","gallstone","galore",
+            "galosh","galumph","galvanic","galvanise","galvanism",
+            "galvanize","gambit","gamble","gamboge","gambol",
+            "game","gamecock","gamekeeper","games","gamesmanship",
+            "gamey","gamma","gammon","gammy","gamp",
+            "gamut","gamy","gander","gang","ganger",
+            "gangling","ganglion","gangplank","gangrene","gangster",
+            "gangway","gannet","gantry","gaol","gaolbird",
+            "gaoler","gap","gape","gapes","garage",
+            "garb","garbage","garble","garden","gardenia",
+            "gardening","gargantuan","gargle","gargoyle","garish",
+            "garland","garlic","garment","garner","garnet",
+            "garnish","garret","garrison","garrote","garrotte",
+            "garrulity","garrulous","garter","gas","gasbag",
+            "gaseous","gash","gasholder","gasify","gasket",
+            "gaslight","gasman","gasolene","gasoline","gasp",
+            "gassy","gastric","gastritis","gastroenteritis","gastronomy",
+            "gasworks","gat","gate","gatecrash","gatehouse",
+            "gatekeeper","gatepost","gateway","gather","gathering",
+            "gauche","gaucherie","gaucho","gaudy","gauge",
+            "gaunt","gauntlet","gauze","gave","gavel",
+            "gavotte","gawk","gawky","gawp","gay",
+            "gayness","gaze","gazebo","gazelle","gazette",
+            "gazetteer","gazump","gce","gear","gearbox",
+            "gecko","gee","geese","geezer","geisha",
+            "gel","gelatine","gelatinous","geld","gelding",
+            "gelignite","gem","gemini","gen","gendarme",
+            "gender","gene","genealogist","genealogy","genera",
+            "general","generalisation","generalise","generalissimo","generality",
+            "generalization","generalize","generally","generate","generation",
+            "generative","generator","generic","generous","genesis",
+            "genetic","geneticist","genetics","genial","geniality",
+            "genie","genital","genitals","genitive","genius",
+            "genocide","genre","gent","genteel","gentian",
+            "gentile","gentility","gentle","gentlefolk","gentleman",
+            "gentlemanly","gentlewoman","gently","gentry","gents",
+            "genuflect","genuine","genus","geocentric","geographer",
+            "geography","geologist","geology","geometric","geometry",
+            "geophysics","geopolitics","georgette","geranium","geriatric",
+            "geriatrician","geriatrics","germ","germane","germanic",
+            "germicide","germinal","germinate","gerontology","gerrymander",
+            "gerund","gestalt","gestapo","gestation","gesticulate",
+            "gesture","get","getaway","getup","geum",
+            "gewgaw","geyser","gharry","ghastly","ghat",
+            "ghaut","ghee","gherkin","ghetto","ghi",
+            "ghost","ghostly","ghoul","ghoulish","ghq",
+            "ghyll","giant","giantess","gibber","gibberish",
+            "gibbet","gibbon","gibbous","gibe","giblets",
+            "giddy","gift","gifted","gig","gigantic",
+            "giggle","gigolo","gild","gilded","gilding",
+            "gill","gillie","gilly","gilt","gimcrack",
+            "gimlet","gimmick","gimmicky","gin","ginger",
+            "gingerbread","gingerly","gingham","gingivitis","gingko",
+            "ginkgo","ginseng","gipsy","giraffe","gird",
+            "girder","girdle","girl","girlfriend","girlhood",
+            "girlie","girlish","girly","giro","girt",
+            "girth","gist","give","giveaway","given",
+            "gizzard","glacial","glacier","glad","gladden",
+            "glade","gladiator","gladiolus","gladly","glamor",
+            "glamorise","glamorize","glamorous","glamour","glamourous",
+            "glance","glancing","gland","glandular","glare",
+            "glaring","glass","glassblower","glasscutter","glasses",
+            "glasshouse","glassware","glassworks","glassy","glaucoma",
+            "glaucous","glaze","glazier","glazing","glc",
+            "gleam","glean","gleaner","gleanings","glebe",
+            "glee","gleeful","glen","glengarry","glib",
+            "glide","glider","gliding","glimmer","glimmerings",
+            "glimpse","glint","glissade","glissando","glisten",
+            "glister","glitter","glittering","gloaming","gloat",
+            "global","globe","globefish","globetrotter","globular",
+            "globule","glockenspiel","gloom","gloomy","gloria",
+            "glorification","glorify","glorious","glory","gloss",
+            "glossary","glossy","glottal","glottis","glove",
+            "glow","glower","glowing","glucose","glue",
+            "gluey","glum","glut","gluten","glutinous",
+            "glutton","gluttonous","gluttony","glycerin","glycerine",
+            "gnarled","gnash","gnat","gnaw","gnawing",
+            "gneiss","gnocchi","gnome","gnp","gnu",
+            "goad","goal","goalkeeper","goalmouth","goalpost",
+            "goat","goatee","goatherd","goatskin","gob",
+            "gobbet","gobble","gobbledegook","gobbledygook","gobbler",
+            "goblet","goblin","god","godchild","goddam",
+            "goddamn","goddie","godforsaken","godhead","godless",
+            "godlike","godly","godown","godparent","gods",
+            "godsend","godspeed","goer","goggle","goggles",
+            "goings","goiter","goitre","gold","goldbeater",
+            "golden","goldfield","goldfinch","goldfish","goldmine",
+            "goldsmith","golf","goliath","golliwog","golly",
+            "gollywog","gonad","gondola","gondolier","gone",
+            "goner","gong","gonna","gonorrhea","gonorrhoea",
+            "goo","good","goodbye","goodish","goodly",
+            "goodness","goodnight","goods","goodwill","goody",
+            "gooey","goof","goofy","googly","goon",
+            "goose","gooseberry","gooseflesh","goosestep","gopher",
+            "gore","gorge","gorgeous","gorgon","gorgonzola",
+            "gorilla","gormandise","gormandize","gormless","gorse",
+            "gory","gosh","gosling","gospel","gossamer",
+            "gossip","gossipy","got","gothic","gotta",
+            "gotten","gouache","gouda","gouge","goulash",
+            "gourd","gourmand","gourmet","gout","gouty",
+            "govern","governance","governess","governing","government",
+            "governor","gown","gpo","grab","grace",
+            "graceful","graceless","graces","gracious","gradation",
+            "grade","gradient","gradual","graduate","graduation",
+            "graffiti","graft","grafter","grail","grain",
+            "gram","grammar","grammarian","grammatical","gramme",
+            "gramophone","grampus","gran","granary","grand",
+            "grandad","grandchild","granddad","granddaughter","grandee",
+            "grandeur","grandfather","grandiloquent","grandiose","grandma",
+            "grandmother","grandpa","grandparent","grandson","grandstand",
+            "grange","granite","grannie","granny","grant",
+        };
     }
 }
\ No newline at end of file


[15/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
index ba99122..c546e9c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData2.cs
@@ -47,6 +47,708 @@ namespace Lucene.Net.Analysis.En
         private KStemData2()
         {
         }
-        internal static string[] data = new string[] { "cash", "cashew", "cashier", "cashmere", "casing", "casino", "cask", "casket", "casque", "cassava", "casserole", "cassette", "cassock", "cassowary", "cast", "castanets", "castaway", "castellated", "caster", "castigate", "casting", "castle", "castor", "castrate", "casual", "casualty", "casuist", "casuistry", "cat", "cataclysm", "catacomb", "catafalque", "catalepsy", "catalog", "catalogue", "catalpa", "catalysis", "catalyst", "catamaran", "catapult", "cataract", "catarrh", "catastrophe", "catatonic", "catcall", "catch", "catcher", "catching", "catchpenny", "catchphrase", "catchword", "catchy", "catechise", "catechism", "catechize", "categorical", "categorise", "categorize", "category", "cater", "caterer", "caterpillar", "caterwaul", "catfish", "catgut", "catharsis", "cathartic", "cathedral", "catheter", "cathode", "catholic", "catholicism", "catholicity", "catkin", "catnap", "catnip", "catsup", "cattle", "catty", "catwalk", "caucu
 s", "caudal", "caught", "caul", "cauldron", "cauliflower", "caulk", "causal", "causality", "causation", "causative", "cause", "causeless", "causeway", "caustic", "cauterise", "cauterize", "caution", "cautionary", "cautious", "cavalcade", "cavalier", "cavalry", "cavalryman", "cave", "caveat", "caveman", "cavern", "cavernous", "caviar", "caviare", "cavil", "cavity", "cavort", "cavy", "caw", "cay", "cayman", "cease", "ceaseless", "cedar", "cede", "cedilla", "ceiling", "celandine", "celebrant", "celebrate", "celebrated", "celebration", "celebrity", "celerity", "celery", "celestial", "celibacy", "celibate", "cell", "cellar", "cellarage", "cellist", "cello", "cellophane", "cellular", "celluloid", "cellulose", "celsius", "celtic", "cement", "cemetery", "cenotaph", "censor", "censorious", "censorship", "censure", "census", "cent", "centaur", "centavo", "centenarian", "centenary", "centennial", "center", "centerboard", "centerpiece", "centigrade", "centigram", "centigramme", "centime", "cent
 imeter", "centimetre", "centipede", "central", "centralise", "centralism", "centralize", "centre", "centreboard", "centrepiece", "centrifugal", "centrifuge", "centripetal", "centrist", "centurion", "century", "cephalic", "ceramic", "ceramics", "cereal", "cerebellum", "cerebral", "cerebration", "cerebrum", "ceremonial", "ceremonious", "ceremony", "cerise", "cert", "certain", "certainly", "certainty", "certifiable", "certificate", "certificated", "certify", "certitude", "cerulean", "cervical", "cervix", "cessation", "cession", "cesspit", "cetacean", "chablis", "chaconne", "chafe", "chaff", "chaffinch", "chagrin", "chain", "chair", "chairman", "chairmanship", "chairperson", "chairwoman", "chaise", "chalet", "chalice", "chalk", "chalky", "challenge", "challenging", "chamber", "chamberlain", "chambermaid", "chambers", "chameleon", "chamiomile", "chamois", "chamomile", "champ", "champagne", "champaign", "champion", "championship", "chance", "chancel", "chancellery", "chancellor", "chancer
 y", "chancy", "chandelier", "chandler", "change", "changeable", "changeless", "changeling", "changeover", "channel", "chant", "chanterelle", "chanticleer", "chantry", "chanty", "chaos", "chaotic", "chap", "chapel", "chapelgoer", "chaperon", "chaperone", "chapfallen", "chaplain", "chaplaincy", "chaplet", "chaps", "chapter", "char", "charabanc", "character", "characterise", "characteristic", "characterization", "characterize", "characterless", "charade", "charades", "charcoal", "chard", "charge", "chargeable", "charged", "charger", "chariot", "charioteer", "charisma", "charismatic", "charitable", "charity", "charlady", "charlatan", "charleston", "charlock", "charlotte", "charm", "charmer", "charming", "chart", "charter", "chartreuse", "charwoman", "chary", "charybdis", "chase", "chaser", "chasm", "chassis", "chaste", "chasten", "chastise", "chastisement", "chastity", "chasuble", "chat", "chatelaine", "chattel", "chatter", "chatterbox", "chatty", "chauffeur", "chauvinism", "chauvinist"
 , "cheap", "cheapen", "cheapskate", "cheat", "check", "checkbook", "checked", "checker", "checkerboard", "checkers", "checklist", "checkmate", "checkoff", "checkout", "checkpoint", "checkrail", "checkrein", "checkroom", "checkup", "cheddar", "cheek", "cheekbone", "cheeky", "cheep", "cheer", "cheerful", "cheering", "cheerio", "cheerleader", "cheerless", "cheers", "cheery", "cheese", "cheesecake", "cheesecloth", "cheeseparing", "cheetah", "chef", "chem", "chemical", "chemise", "chemist", "chemistry", "chemotherapy", "chenille", "cheque", "chequebook", "chequer", "cherish", "cheroot", "cherry", "cherub", "chervil", "chess", "chessboard", "chessman", "chest", "chesterfield", "chestnut", "chesty", "chevalier", "chevron", "chevvy", "chevy", "chew", "chi", "chianti", "chiaroscuro", "chic", "chicanery", "chicano", "chichi", "chick", "chicken", "chickenfeed", "chickenhearted", "chickpea", "chickweed", "chicle", "chicory", "chide", "chief", "chiefly", "chieftain", "chieftainship", "chiffon", 
 "chiffonier", "chiffonnier", "chigger", "chignon", "chihuahua", "chilblain", "child", "childbearing", "childbirth", "childhood", "childish", "childlike", "chile", "chill", "chiller", "chilli", "chilly", "chimaera", "chime", "chimera", "chimerical", "chimney", "chimneybreast", "chimneypiece", "chimneypot", "chimneystack", "chimneysweep", "chimpanzee", "chin", "china", "chinatown", "chinaware", "chinchilla", "chine", "chink", "chinless", "chinook", "chinstrap", "chintz", "chinwag", "chip", "chipboard", "chipmunk", "chippendale", "chipping", "chippy", "chiromancy", "chiropody", "chiropractic", "chirp", "chirpy", "chisel", "chiseler", "chiseller", "chit", "chitchat", "chivalrous", "chivalry", "chive", "chivvy", "chivy", "chloride", "chlorinate", "chlorine", "chloroform", "chlorophyll", "chock", "chocolate", "choice", "choir", "choirboy", "choirmaster", "choke", "choker", "chokey", "choky", "choler", "cholera", "choleric", "cholesterol", "chomp", "choose", "choosey", "choosy", "chop", "c
 hopfallen", "chophouse", "chopper", "choppers", "choppy", "chopstick", "choral", "chorale", "chord", "chore", "choreographer", "choreography", "chorine", "chorister", "chortle", "chorus", "chose", "chosen", "chow", "chowder", "christ", "christen", "christendom", "christening", "christian", "christianity", "christlike", "christmastime", "chromatic", "chrome", "chromium", "chromosome", "chronic", "chronicle", "chronograph", "chronological", "chronology", "chronometer", "chrysalis", "chrysanthemum", "chub", "chubby", "chuck", "chuckle", "chug", "chukker", "chum", "chummy", "chump", "chunk", "chunky", "church", "churchgoer", "churching", "churchwarden", "churchyard", "churl", "churlish", "churn", "chute", "chutney", "cia", "cicada", "cicatrice", "cicerone", "cid", "cider", "cif", "cigar", "cigaret", "cigarette", "cinch", "cincture", "cinder", "cinderella", "cinders", "cine", "cinema", "cinematograph", "cinematography", "cinnamon", "cinquefoil", "cipher", "circa", "circadian", "circle", 
 "circlet", "circuit", "circuitous", "circular", "circularise", "circularize", "circulate", "circulation", "circumcise", "circumcision", "circumference", "circumflex", "circumlocution", "circumnavigate", "circumscribe", "circumscription", "circumspect", "circumstance", "circumstances", "circumstantial", "circumvent", "circus", "cirque", "cirrhosis", "cirrus", "cissy", "cistern", "citadel", "citation", "cite", "citizen", "citizenry", "citizenship", "citron", "citrous", "citrus", "city", "civet", "civic", "civics", "civies", "civil", "civilian", "civilisation", "civilise", "civility", "civilization", "civilize", "civilly", "civvies", "clack", "clad", "claim", "claimant", "clairvoyance", "clairvoyant", "clam", "clambake", "clamber", "clammy", "clamor", "clamorous", "clamour", "clamp", "clampdown", "clamshell", "clan", "clandestine", "clang", "clanger", "clangor", "clangour", "clank", "clannish", "clansman", "clap", "clapboard", "clapper", "clapperboard", "clappers", "claptrap", "claque"
 , "claret", "clarification", "clarify", "clarinet", "clarinetist", "clarinettist", "clarion", "clarity", "clarts", "clash", "clasp", "class", "classic", "classical", "classicism", "classicist", "classics", "classification", "classified", "classify", "classless", "classmate", "classroom", "classy", "clatter", "clause", "claustrophobia", "claustrophobic", "clavichord", "clavicle", "claw", "clay", "claymore", "clean", "cleaner", "cleanliness", "cleanly", "cleanse", "cleanser", "cleanup", "clear", "clearance", "clearing", "clearinghouse", "clearly", "clearout", "clearway", "cleat", "cleavage", "cleave", "cleaver", "clef", "cleft", "clematis", "clemency", "clement", "clench", "clerestory", "clergy", "clergyman", "clerical", "clerihew", "clerk", "clever", "clew", "click", "client", "clientele", "cliff", "cliffhanger", "climacteric", "climactic", "climate", "climatic", "climatology", "climax", "climb", "climber", "clime", "clinch", "clincher", "cline", "cling", "clinging", "clingy", "clini
 c", "clinical", "clink", "clinker", "clip", "clipboard", "clipper", "clippers", "clippie", "clipping", "clique", "cliquey", "cliquish", "clitoris", "cloaca", "cloak", "cloakroom", "clobber", "cloche", "clock", "clockwise", "clockwork", "clod", "cloddish", "clodhopper", "clog", "cloggy", "cloister", "clone", "clop", "close", "closed", "closedown", "closefisted", "closet", "closure", "clot", "cloth", "clothe", "clothes", "clothesbasket", "clotheshorse", "clothesline", "clothier", "clothing", "cloture", "cloud", "cloudbank", "cloudburst", "cloudless", "cloudy", "clout", "clove", "cloven", "clover", "cloverleaf", "clown", "clownish", "cloy", "club", "clubbable", "clubfoot", "clubhouse", "cluck", "clue", "clueless", "clump", "clumsy", "clung", "cluster", "clutch", "clutches", "clutter", "coach", "coachbuilder", "coachman", "coachwork", "coadjutor", "coagulant", "coagulate", "coal", "coalbunker", "coalesce", "coalface", "coalfield", "coalhole", "coalhouse", "coalition", "coalmine", "coals
 cuttle", "coarse", "coarsen", "coast", "coastal", "coaster", "coastguard", "coastguardsman", "coastline", "coastwise", "coat", "coating", "coax", "cob", "cobalt", "cobber", "cobble", "cobbler", "cobblers", "cobblestone", "cobra", "cobweb", "cocaine", "coccyx", "cochineal", "cochlea", "cock", "cockade", "cockatoo", "cockchafer", "cockcrow", "cockerel", "cockeyed", "cockfight", "cockhorse", "cockle", "cockleshell", "cockney", "cockpit", "cockroach", "cockscomb", "cocksure", "cocktail", "cocky", "coco", "cocoa", "coconut", "cocoon", "cod", "coda", "coddle", "code", "codeine", "codex", "codger", "codicil", "codify", "codling", "codpiece", "codswallop", "coed", "coeducation", "coefficient", "coelacanth", "coequal", "coerce", "coercion", "coercive", "coeternal", "coeval", "coexist", "coexistence", "coffee", "coffeepot", "coffer", "cofferdam", "coffers", "coffin", "cog", "cogency", "cogent", "cogitate", "cogitation", "cognac", "cognate", "cognition", "cognitive", "cognizance", "cognizant",
  "cognomen", "cognoscenti", "cogwheel", "cohabit", "cohere", "coherence", "coherent", "cohesion", "cohesive", "cohort", "coif", "coiffeur", "coiffure", "coil", "coin", "coinage", "coincide", "coincidence", "coincident", "coincidental", "coir", "coitus", "coke", "col", "cola", "colander", "cold", "coleslaw", "coley", "colic", "colicky", "colitis", "collaborate", "collaboration", "collaborationist", "collage", "collapse", "collapsible", "collar", "collarbone", "collate", "collateral", "collation", "colleague", "collect", "collected", "collection", "collective", "collectivise", "collectivism", "collectivize", "collector", "colleen", "college", "collegiate", "collide", "collie", "collier", "colliery", "collision", "collocate", "collocation", "colloquial", "colloquialism", "colloquy", "collude", "collusion", "collywobbles", "cologne", "colon", "colonel", "colonial", "colonialism", "colonialist", "colonies", "colonise", "colonist", "colonize", "colonnade", "colony", "color", "coloration",
  "coloratura", "colored", "colorfast", "colorful", "coloring", "colorless", "colors", "colossal", "colossally", "colossus", "colostrum", "colour", "coloured", "colourfast", "colourful", "colouring", "colourless", "colours", "colt", "colter", "coltish", "columbine", "column", "columnist", "coma", "comatose", "comb", "combat", "combatant", "combative", "comber", "combination", "combinations", "combinatorial", "combine", "combo", "combustible", "combustion", "come", "comeback", "comecon", "comedian", "comedienne", "comedown", "comedy", "comely", "comer", "comestible", "comet", "comfit", "comfort", "comfortable", "comforter", "comfrey", "comfy", "comic", "comical", "comics", "cominform", "coming", "comintern", "comity", "comma", "command", "commandant", "commandeer", "commander", "commanding", "commandment", "commando", "commemorate", "commemoration", "commemorative", "commence", "commencement", "commend", "commendable", "commendation", "commendatory", "commensurable", "commensurate", "
 comment", "commentary", "commentate", "commentator", "commerce", "commercial", "commercialise", "commercialism", "commercialize", "commie", "commiserate", "commiseration", "commissar", "commissariat", "commissary", "commission", "commissionaire", "commissioner", "commit", "commitment", "committal", "committed", "committee", "committeeman", "commode", "commodious", "commodity", "commodore", "common", "commonage", "commonalty", "commoner", "commonly", "commonplace", "commons", "commonweal", "commonwealth", "commotion", "communal", "commune", "communicable", "communicant", "communicate", "communication", "communications", "communicative", "communion", "communism", "communist", "community", "commutable", "commutation", "commutative", "commutator", "commute", "commuter", "compact", "compacted", "companion", "companionable", "companionship", "companionway", "company", "comparable", "comparative", "comparatively", "compare", "comparison", "compartment", "compartmentalise", "compartmentaliz
 e", "compass", "compassion", "compassionate", "compatibility", "compatible", "compatriot", "compeer", "compel", "compendious", "compendium", "compensate", "compensation", "compensatory", "compere", "compete", "competence", "competent", "competition", "competitive", "competitor", "compilation", "compile", "complacency", "complacent", "complain", "complainant", "complaint", "complaisance", "complaisant", "complement", "complementary", "complete", "completely", "completion", "complex", "complexion", "complexity", "compliance", "compliant", "complicate", "complicated", "complication", "complicity", "compliment", "complimentary", "compliments", "complin", "compline", "comply", "compo", "component", "comport", "comportment", "compose", "composer", "composite", "composition", "compositor", "compost", "composure", "compote", "compound", "comprehend", "comprehensible", "comprehension", "comprehensive", "compress", "compressible", "compression", "compressor", "comprise", "compromise", "compto
 meter", "comptroller", "compulsion", "compulsive", "compulsory", "compunction", "computation", "compute", "computer", "computerize", "comrade", "comradeship", "coms", "con", "concatenate", "concatenation", "concave", "concavity", "conceal", "concealment", "concede", "conceit", "conceited", "conceivable", "conceive", "concentrate", "concentrated", "concentration", "concentric", "concept", "conception", "conceptual", "conceptualise", "conceptualize", "concern", "concerned", "concernedly", "concerning", "concert", "concerted", "concertgoer", "concertina", "concertmaster", "concerto", "concession", "concessionaire", "concessive", "conch", "conchology", "concierge", "conciliate", "conciliation", "conciliatory", "concise", "concision", "conclave", "conclude", "conclusion", "conclusive", "concoct", "concoction", "concomitance", "concomitant", "concord", "concordance", "concordant", "concordat", "concourse", "concrete", "concubinage", "concubine", "concupiscence", "concur", "concurrence", "
 concurrent", "concuss", "concussion", "condemn", "condemnation", "condensation", "condense", "condenser", "condescend", "condescension", "condign", "condiment", "condition", "conditional", "conditions", "condole", "condolence", "condom", "condominium", "condone", "condor", "conduce", "conducive", "conduct", "conduction", "conductive", "conductivity", "conductor", "conduit", "cone", "coney", "confabulate", "confabulation", "confection", "confectioner", "confectionery", "confederacy", "confederate", "confederation", "confer", "conference", "confess", "confessed", "confession", "confessional", "confessor", "confetti", "confidant", "confide", "confidence", "confident", "confidential", "confiding", "configuration", "confine", "confinement", "confines", "confirm", "confirmation", "confirmed", "confiscate", "confiscatory", "conflagration", "conflate", "conflict", "confluence", "conform", "conformable", "conformation", "conformist", "conformity", "confound", "confounded", "confraternity", "
 confront", "confrontation", "confucian", "confucianism", "confuse", "confusion", "confute", "conga", "congeal", "congenial", "congenital", "congest", "congestion", "conglomerate", "conglomeration", "congrats", "congratulate", "congratulations", "congratulatory", "congregate", "congregation", "congregational", "congregationalism", "congress", "congressional", "congressman", "congruent", "congruity", "congruous", "conic", "conical", "conifer", "coniferous", "conj", "conjectural", "conjecture", "conjoin", "conjoint", "conjugal", "conjugate", "conjugation", "conjunction", "conjunctiva", "conjunctive", "conjunctivitis", "conjuncture", "conjure", "conjurer", "conjuror", "conk", "conker", "conkers", "connect", "connected", "connection", "connective", "connexion", "connivance", "connive", "connoisseur", "connotation", "connotative", "connote", "connubial", "conquer", "conquest", "conquistador", "consanguineous", "consanguinity", "conscience", "conscientious", "conscious", "consciousness", "
 conscript", "conscription", "consecrate", "consecration", "consecutive", "consensus", "consent", "consequence", "consequent", "consequential", "consequently", "conservancy", "conservation", "conservationist", "conservatism", "conservative", "conservatoire", "conservatory", "conserve", "consider", "considerable", "considerably", "considerate", "consideration", "considered", "considering", "consign", "consignee", "consigner", "consignment", "consignor", "consist", "consistency", "consistent", "consistory", "consolation", "consolatory", "console", "consolidate", "consols", "consonance", "consonant", "consort", "consortium", "conspectus", "conspicuous", "conspiracy", "conspirator", "conspiratorial", "conspire", "constable", "constabulary", "constancy", "constant", "constellation", "consternation", "constipate", "constipation", "constituency", "constituent", "constitute", "constitution", "constitutional", "constitutionalism", "constitutionally", "constitutive", "constrain", "constrained"
 , "constraint", "constrict", "constriction", "constrictor", "construct", "construction", "constructive", "constructor", "construe", "consubstantiation", "consul", "consular", "consulate", "consult", "consultancy", "consultant", "consultation", "consultative", "consulting", "consume", "consumer", "consummate", "consummation", "consumption", "consumptive", "contact", "contagion", "contagious", "contain", "contained", "container", "containerise", "containerize", "containment", "contaminate", "contamination", "contemplate", "contemplation", "contemplative", "contemporaneous", "contemporary", "contempt", "contemptible", "contemptuous", "contend", "contender", "content", "contented", "contention", "contentious", "contentment", "contents", "contest", "contestant", "context", "contextual", "contiguity", "contiguous", "continence", "continent", "continental", "contingency", "contingent", "continual", "continuance", "continuation", "continue", "continuity", "continuo", "continuous", "continuu
 m", "contort", "contortion", "contortionist", "contour", "contraband", "contrabass", "contraception", "contraceptive", "contract", "contractile", "contraction", "contractor", "contractual", "contradict", "contradiction", "contradictory", "contradistinction", "contrail", "contraindication", "contralto", "contraption", "contrapuntal", "contrariety", "contrariwise", "contrary", "contrast", "contravene", "contravention", "contretemps", "contribute", "contribution", "contributor", "contributory", "contrite", "contrition", "contrivance", "contrive", "contrived", "control", "controller", "controversial", "controversy", "controvert", "contumacious", "contumacy", "contumelious", "contumely", "contuse", "contusion", "conundrum", "conurbation", "convalesce", "convalescence", "convalescent", "convection", "convector", "convene", "convener", "convenience", "convenient", "convenor", "convent", "conventicle", "convention", "conventional", "conventionality", "converge", "conversant", "conversation"
 , "conversational", "conversationalist", "conversazione", "converse", "conversion", "convert", "converter", "convertible", "convex", "convexity", "convey", "conveyance", "conveyancer", "conveyancing", "conveyer", "conveyor", "convict", "conviction", "convince", "convinced", "convincing", "convivial", "convocation", "convoke", "convoluted", "convolution", "convolvulus", "convoy", "convulse", "convulsion", "convulsive", "cony", "coo", "cook", "cooker", "cookery", "cookhouse", "cookie", "cooking", "cookout", "cool", "coolant", "cooler", "coolie", "coon", "coop", "cooper", "cooperate", "cooperation", "cooperative", "coordinate", "coordinates", "coordination", "coot", "cop", "cope", "copeck", "copier", "copilot", "coping", "copingstone", "copious", "copper", "copperhead", "copperplate", "coppersmith", "coppice", "copra", "coptic", "copula", "copulate", "copulative", "copy", "copybook", "copyboy", "copycat", "copydesk", "copyhold", "copyist", "copyright", "copywriter", "coquetry", "coquet
 te", "cor", "coracle", "coral", "corbel", "cord", "cordage", "cordial", "cordiality", "cordially", "cordillera", "cordite", "cordon", "cords", "corduroy", "core", "corelate", "coreligionist", "corer", "corespondent", "corgi", "coriander", "corinthian", "cork", "corkage", "corked", "corker", "corkscrew", "corm", "cormorant", "corn", "corncob", "corncrake", "cornea", "cornelian", "corner", "cornerstone", "cornet", "cornfield", "cornflakes", "cornflower", "cornice", "cornish", "cornucopia", "corny", "corolla", "corollary", "corona", "coronary", "coronation", "coroner", "coronet", "corpora", "corporal", "corporate", "corporation", "corporeal", "corps", "corpse", "corpulence", "corpulent", "corpus", "corpuscle", "corral", "correct", "correction", "correctitude", "corrective", "correlate", "correlation", "correlative", "correspond", "correspondence", "correspondent", "corresponding", "corridor", "corrie", "corrigendum", "corroborate", "corroboration", "corroborative", "corroboree", "corro
 de", "corrosion", "corrosive", "corrugate", "corrugation", "corrupt", "corruption", "corsage", "corsair", "corse", "corselet", "corset", "cortex", "cortisone", "corundum", "coruscate", "corvette", "cos", "cosh", "cosignatory", "cosine", "cosmetic", "cosmetician", "cosmic", "cosmogony", "cosmology", "cosmonaut", "cosmopolitan", "cosmos", "cosset", "cost", "costermonger", "costive", "costly", "costs", "costume", "costumier", "cosy", "cot", "cotangent", "cote", "coterie", "coterminous", "cotillion", "cottage", "cottager", "cottar", "cotter", "cotton", "cottonseed", "cottontail", "cotyledon", "couch", "couchant", "couchette", "cougar", "cough", "could", "couldst", "coulter", "council", "councillor", "counsel", "counsellor", "counselor", "count", "countable", "countdown", "countenance", "counter", "counteract", "counterattack", "counterattraction", "counterbalance", "counterblast", "counterclaim", "counterclockwise", "counterespionage", "counterfeit", "counterfoil", "counterintelligence"
 , "counterirritant", "countermand", "countermarch", "countermeasure", "counteroffensive", "counterpane", "counterpart", "counterpoint", "counterpoise", "countersign", "countersink", "countertenor", "countervail", "countess", "countinghouse", "countless", "countrified", "country", "countryman", "countryside", "county", "coup", "couple", "couplet", "coupling", "coupon", "courage", "courageous", "courgette", "courier", "course", "courser", "coursing", "court", "courteous", "courtesan", "courtesy", "courthouse", "courtier", "courting", "courtly", "courtroom", "courtship", "courtyard", "couscous", "cousin", "couture", "cove", "coven", "covenant", "coventry", "cover", "coverage", "covering", "coverlet", "covert", "covet", "covetous", "covey", "cow", "coward", "cowardice", "cowardly", "cowbell", "cowboy", "cowcatcher", "cower", "cowgirl", "cowhand", "cowheel", "cowherd", "cowhide", "cowl", "cowlick", "cowling", "cowman", "cowpat", "cowpox", "cowrie", "cowry", "cowshed", "cowslip", "cox", "
 coxcomb", "coy", "coyote", "coypu", "cozen", "cozy", "cpa", "crab", "crabbed", "crabby", "crabgrass", "crabwise", "crack", "crackbrained", "crackdown", "cracked", "cracker", "crackers", "crackle", "crackleware", "crackling", "crackpot", "cracksman", "crackup", "cradle", "craft", "craftsman", "crafty", "crag", "craggy", "crake", "cram", "crammer", "cramp", "cramped", "crampon", "cramps", "cranberry", "crane", "cranial", "cranium", "crank", "crankshaft", "cranky", "cranny", "crap", "crape", "crappy", "craps", "crash", "crashing", "crass", "crate", "crater", "cravat", "crave", "craven", "craving", "crawl", "crawler", "crawlers", "crayfish", "crayon", "craze", "crazy", "creak", "creaky", "cream", "creamer", "creamery", "creamy", "crease", "create", "creation", "creative", "creativity", "creator", "creature", "credence", "credentials", "credibility", "credible", "credit", "creditable", "creditor", "credo", "credulous", "creed", "creek", "creel", "creep", "creeper", "creepers", "creeps", 
 "creepy", "cremate", "crematorium", "crenelated", "crenellated", "creole", "creosote", "crept", "crepuscular", "crescendo", "crescent", "cress", "crest", "crested", "crestfallen", "cretaceous", "cretin", "cretonne", "crevasse", "crevice", "crew", "crewman", "crib", "cribbage", "crick", "cricket", "cricketer", "crier", "cries", "crikey", "crime", "criminal", "criminology", "crimp", "crimplene", "crimson", "cringe", "crinkle", "crinkly", "crinoid", "crinoline", "cripes", "cripple", "crisis", "crisp", "crispy", "crisscross", "criterion", "critic", "critical", "criticise", "criticism", "criticize", "critique", "critter", "croak", "crochet", "crock", "crockery", "crocodile", "crocus", "croft", "crofter", "croissant", "cromlech", "crone", "crony", "crook", "crooked", "croon", "crooner", "crop", "cropper", "croquet", "croquette", "crore", "crosier", "cross", "crossbar", "crossbeam", "crossbenches", "crossbones", "crossbow", "crossbred", "crossbreed", "crosscheck", "crosscurrent", "crosscut
 ", "crossfire", "crossing", "crossover", "crosspatch", "crosspiece", "crossply", "crossroad", "crossroads", "crosstree", "crosswalk", "crosswind", "crosswise", "crossword", "crotch", "crotchet", "crotchety", "crouch", "croup", "croupier", "crouton", "crow", "crowbar", "crowd", "crowded", "crowfoot", "crown", "crozier", "crucial", "crucible", "crucifix", "crucifixion", "cruciform", "crucify", "crude", "crudity", "cruel", "cruelty", "cruet", "cruise", "cruiser", "crumb", "crumble", "crumbly", "crummy", "crumpet", "crumple", "crunch", "crupper", "crusade", "cruse", "crush", "crust", "crustacean", "crusty", "crutch", "crux", "cry", "crybaby", "crying", "crypt", "cryptic", "cryptogram", "cryptography", "crystal", "crystalline", "crystallise", "crystallize", "cub", "cubbyhole", "cube", "cubic", "cubical", "cubicle", "cubism", "cubit", "cubs", "cuckold", "cuckoldry", "cuckoo", "cucumber", "cud", "cuddle", "cuddlesome", "cuddly", "cudgel", "cue", "cuff", "cuffs", "cuirass", "cuisine", "culi
 nary", "cull", "cullender", "culminate", "culmination", "culotte", "culottes", "culpable", "culprit", "cult", "cultivable", "cultivate", "cultivated", "cultivation", "cultivator", "cultural", "culture", "cultured", "culvert", "cumber", "cumbersome", "cumin", "cummerbund", "cumulative", "cumulonimbus", "cumulus", "cuneiform", "cunnilingus", "cunning", "cunt", "cup", "cupbearer", "cupboard", "cupid", "cupidity", "cupola", "cuppa", "cupping", "cupric", "cur", "curable", "curacy", "curate", "curative", "curator", "curb", "curd", "curdle", "cure", "curettage", "curfew", "curia", "curio", "curiosity", "curious", "curl", "curler", "curlew", "curlicue", "curling", "curly", "curlycue", "curmudgeon", "currant", "currency", "current", "curriculum", "currish", "curry", "curse", "cursed", "cursive", "cursory", "curt", "curtail", "curtain", "curtains", "curtsey", "curtsy", "curvaceous", "curvacious", "curvature", "curve", "cushion", "cushy", "cusp", "cuspidor", "cuss", "cussed", "custard", "custo
 dial", "custodian", "custody", "custom", "customary", "customer", "customs", "cut", "cutaway", "cutback", "cuticle", "cutlass", "cutler", "cutlery", "cutlet", "cutoff", "cutout", "cutpurse", "cutter", "cutthroat", "cutting", "cuttlefish", "cutworm", "cwm", "cwt", "cyanide", "cybernetics", "cyclamate", "cyclamen", "cycle", "cyclic", "cyclist", "cyclone", "cyclopaedia", "cyclopedia", "cyclostyle", "cyclotron", "cyder", "cygnet", "cylinder", "cymbal", "cynic", "cynical", "cynicism", "cynosure", "cypher", "cypress", "cyrillic", "cyst", "cystitis", "cytology", "czar", "czarina", "czech", "dab", "dabble", "dabchick", "dabs", "dace", "dachshund", "dactyl", "dad", "daddy", "dado", "daemon", "daffodil", "daft", "dagger", "dago", "daguerreotype", "dahlia", "daily", "dainty", "daiquiri", "dairy", "dairying", "dairymaid", "dairyman", "dais", "daisy", "dale", "dalliance", "dally", "dalmation", "dam", "damage", "damages", "damascene", "damask", "damn", "damnable", "damnation", "damnedest", "damni
 ng", "damocles", "damp", "dampen", "damper", "dampish", "damsel", "damson", "dance", "dandelion", "dander", "dandified", "dandle", "dandruff", "dandy", "danger", "dangerous", "dangle", "dank", "dapper", "dappled", "dare", "daredevil", "daresay", "daring", "dark", "darken", "darkey", "darkroom", "darky", "darling", "darn", "darning", "dart", "dartboard", "dartmoor", "darts", "dash", "dashboard", "dashed", "dashing", "data", "date", "dated", "dateless", "dateline", "dates", "dative", "daub", "daughter", "daunt", "dauntless", "dauphin", "davit", "dawdle", "dawn", "day", "dayboy", "daybreak", "daydream", "daylight", "dayroom", "days", "daytime", "daze", "dazzle", "ddt", "deacon", "dead", "deaden", "deadline", "deadlock", "deadly", "deadpan", "deadweight", "deaf", "deafen", "deal", "dealer", "dealing", "dealings", "dean", "deanery", "dear", "dearest", "dearie", "dearly", "dearth", "deary", "death", "deathbed", "deathblow", "deathless", "deathlike", "deathly", "deathwatch", "deb", "debar"
 , "debark", "debase", "debatable", "debate", "debater", "debauch", "debauchee", "debauchery", "debenture", "debilitate", "debility", "debit", "debonair", "debone", "debouch", "debrief", "debris", "debt", "debtor", "debug", "debunk", "debut", "debutante", "decade", "decadence", "decadent", "decalogue", "decamp", "decant", "decanter", "decapitate", "decathlon", "decay", "decease", "deceased", "deceit", "deceitful", "deceive", "decelerate", "december", "decencies", "decency", "decent", "decentralise", "decentralize", "deception", "deceptive", "decibel", "decide", "decided", "decidedly", "deciduous", "decimal", "decimalise", "decimalize", "decimate", "decipher", "decision", "decisive", "deck", "deckchair", "deckhand", "declaim", "declamation", "declaration", "declare", "declared", "declassify", "declension", "declination", "decline", "declivity", "declutch", "decoction", "decode", "decolonise", "decolonize", "decompose", "decompress", "decongestant", "decontaminate", "decontrol", "decor
 ate", "decoration", "decorative", "decorator", "decorous", "decorum", "decoy", "decrease", "decree", "decrepit", "decrepitude", "decry", "dedicate", "dedicated", "dedication", "deduce", "deduct", "deduction", "deductive", "deed", "deem", "deep", "deepen", "deer", "deerstalker", "def", "deface", "defame", "default", "defeat", "defeatism", "defecate", "defect", "defection", "defective", "defence", "defend", "defendant", "defense", "defensible", "defensive", "defer", "deference", "defiance", "defiant", "deficiency", "deficient", "deficit", "defile", "define", "definite", "definitely", "definition", "definitive", "deflate", "deflation", "deflationary", "deflect", "deflection", "deflower", "defoliant", "defoliate", "deforest", "deform", "deformation", "deformity", "defraud", "defray", "defrock", "defrost", "deft", "defunct", "defuse", "defy", "degauss", "degeneracy", "degenerate", "degeneration", "degenerative", "degrade", "degree", "dehorn", "dehumanise", "dehumanize", "dehydrate", "dei
 ce", "deification", "deify", "deign", "deism", "deity", "dejected", "dejection", "dekko", "delay", "delectable", "delectation", "delegacy", "delegate", "delegation", "delete", "deleterious", "deletion", "delft", "deliberate", "deliberation", "deliberative", "delicacy", "delicate", "delicatessen", "delicious", "delight", "delightful", "delimit", "delineate", "delinquency", "delinquent", "deliquescent", "delirious", "delirium", "deliver", "deliverance", "delivery", "deliveryman", "dell", "delouse", "delphic", "delphinium", "delta", "delude", "deluge", "delusion", "delusive", "delve", "demagnetise", "demagnetize", "demagogic", "demagogue", "demagoguery", "demand", "demanding", "demarcate", "demarcation", "demean", "demeanor", "demeanour", "demented", "demerit", "demesne", "demigod", "demijohn", "demilitarise", "demilitarize", "demise", "demist", "demister", "demo", "demob", "demobilise", "demobilize", "democracy", "democrat", "democratic", "democratise", "democratize", "demography", "d
 emolish", "demolition", "demon", "demonetise", "demonetize", "demoniacal", "demonic", "demonstrable", "demonstrate", "demonstration", "demonstrative", "demonstrator", "demoralise", "demoralize", "demote", "demotic", "demur", "demure", "demystify", "den", "denationalise", "denationalize", "denial", "denier", "denigrate", "denim", "denims", "denizen", "denominate", "denomination", "denominational", "denominator", "denotation", "denote", "denouement", "denounce", "dense", "density", "dent", "dental", "dentifrice", "dentist", "dentistry", "denture", "dentures", "denude", "denunciation", "deny", "deodorant", "deodorise", "deodorize", "depart", "departed", "department", "departure", "depend", "dependable", "dependant", "dependence", "dependency", "dependent", "depict", "depilatory", "deplete", "deplorable", "deplore", "deploy", "deponent", "depopulate", "deport", "deportee", "deportment", "depose", "deposit", "deposition", "depositor", "depository", "depot", "deprave", "depravity", "depre
 cate", "deprecatory", "depreciate", "depreciatory", "depredation", "depress", "depressed", "depression", "deprivation", "deprive", "deprived", "depth", "depths", "deputation", "depute", "deputise", "deputize", "deputy", "derail", "derange", "derby", "derelict", "dereliction", "deride", "derision", "derisive", "derisory", "derivative", "derive", "dermatitis", "dermatology", "derogate", "derogatory", "derrick", "derv", "dervish", "des", "desalinise", "desalinize", "descale", "descant", "descend", "descendant", "descended", "descent", "describe", "description", "descriptive", "descry", "desecrate", "desegregate", "desensitise", "desensitize", "desert", "deserter", "desertion", "deserts", "deserve", "deservedly", "deserving", "desiccant", "desiccate", "desideratum", "design", "designate", "designation", "designedly", "designer", "designing", "designs", "desirable", "desire", "desirous", "desist", "desk", "deskwork", "desolate", "despair", "despairing", "despatch", "despatches", "despera
 do", "desperate", "desperation", "despicable", "despise", "despite", "despoil", "despondent", "despot", "despotic", "despotism", "dessert", "dessertspoon", "dessertspoonful", "destination", "destined", "destiny", "destitute", "destroy", "destroyer", "destruction", "destructive", "desuetude", "desultory", "detach", "detached", "detachedly", "detachment", "detail", "detailed", "detain", "detainee", "detect", "detection", "detective", "detector", "detention", "deter", "detergent", "deteriorate", "determinant", "determination", "determine", "determined", "determiner", "determinism", "deterrent", "detest", "dethrone", "detonate", "detonation", "detonator", "detour", "detract", "detractor", "detrain", "detriment", "detritus", "deuce", "deuced", "deuteronomy", "devaluation", "devalue", "devastate", "devastating", "develop", "developer", "development", "developmental", "deviance", "deviant", "deviate", "deviation", "deviationist", "device", "devil", "devilish", "devilishly", "devilment", "d
 evious", "devise", "devitalise", "devitalize", "devoid", "devolution", "devolve", "devote", "devoted", "devotee", "devotion", "devotional", "devotions", "devour", "devout", "devoutly", "dew", "dewdrop", "dewlap", "dewpond", "dewy", "dexterity", "dexterous", "dextrose", "dhoti", "dhow", "diabetes", "diabetic", "diabolic", "diabolical", "diacritic", "diacritical", "diadem", "diaeresis", "diagnose", "diagnosis", "diagnostic", "diagonal", "diagram", "dial", "dialect", "dialectic", "dialectician", "dialog", "dialogue", "diameter", "diametrically", "diamond", "diaper", "diaphanous", "diaphragm", "diarist", "diarrhea", "diarrhoea", "diary", "diaspora", "diatom", "diatribe", "dibble", "dice", "dicey", "dichotomy", "dick", "dicker", "dickie", "dicky", "dickybird", "dictaphone", "dictate", "dictation", "dictator", "dictatorial", "dictatorship", "diction", "dictionary", "dictum", "did", "didactic", "diddle", "didst", "die", "diehard", "dieresis", "diet", "dietary", "dietetic", "dietetics", "di
 etician", "dietitian", "differ", "difference", "different", "differential", "differentiate", "difficult", "difficulty", "diffident", "diffract", "diffuse", "diffusion", "dig", "digest", "digestion", "digestive", "digger", "digging", "diggings", "digit", "digital", "dignified", "dignify", "dignitary", "dignity", "digraph", "digress", "digression", "digs", "dike", "dilapidated", "dilapidation", "dilapidations", "dilate", "dilatory", "dildo", "dilemma", "dilettante", "diligence", "diligent", "dill", "dillydally", "dilute", "dilution", "dim", "dimension", "dimensions", "diminish", "diminuendo", "diminution", "diminutive", "dimity", "dimple", "dimwit", "din", "dinar", "dine", "diner", "dingdong", "dinghy", "dingle", "dingo", "dingy", "dink", "dinkum", "dinky", "dinner", "dinosaur", "dint", "diocese", "dioxide", "dip", "diphtheria", "diphthong", "diploma", "diplomacy", "diplomat", "diplomatic", "diplomatically", "diplomatist", "dipper", "dipsomania", "dipsomaniac", "dipstick", "dipswitch"
 , "diptych", "dire", "direct", "direction", "directional", "directions", "directive", "directly", "director", "directorate", "directorship", "directory", "direful", "dirge", "dirigible", "dirk", "dirndl", "dirt", "dirty", "disability", "disable", "disabled", "disabuse", "disadvantage", "disadvantageous", "disaffected", "disaffection", "disaffiliate", "disafforest", "disagree", "disagreeable", "disagreement", "disallow", "disappear", "disappearance", "disappoint", "disappointed", "disappointing", "disappointment", "disapprobation", "disapproval", "disapprove", "disarm", "disarmament", "disarrange", "disarray", "disassociate", "disaster", "disastrous", "disavow", "disband", "disbar", "disbelief", "disbelieve", "disburden", "disburse", "disbursement", "disc", "discard", "discern", "discerning", "discernment", "discharge", "disciple", "discipleship", "disciplinarian", "disciplinary", "discipline", "disclaim", "disclaimer", "disclose", "disclosure", "disco", "discolor", "discoloration", 
 "discolour", "discolouration", "discomfit", "discomfiture", "discomfort", "discommode", "discompose", "disconcert", "disconnect", "disconnected", "disconnection", "disconsolate", "discontent", "discontented", "discontinue", "discontinuity", "discontinuous", "discord", "discordance", "discordant", "discotheque", "discount", "discountenance", "discourage", "discouragement", "discourse", "discourteous", "discourtesy", "discover", "discovery", "discredit", "discreditable", "discreet", "discrepancy", "discrete", "discretion", "discretionary", "discriminate", "discriminating", "discrimination", "discriminatory", "discursive", "discus", "discuss", "discussion", "disdain", "disdainful", "disease", "disembark", "disembarrass", "disembodied", "disembowel", "disembroil", "disenchant", "disencumber", "disendow", "disengage", "disengaged", "disentangle", "disequilibrium", "disestablish", "disfavor", "disfavour", "disfigure", "disforest", "disfranchise", "disfrock", "disgorge", "disgrace", "disgr
 aceful", "disgruntled", "disguise", "disgust", "dish", "dishabille", "disharmony", "dishcloth", "dishearten", "dishes", "dishevelled", "dishful", "dishonest", "dishonesty", "dishonor", "dishonorable", "dishonour", "dishonourable", "dishwasher", "dishwater", "dishy", "disillusion", "disillusioned", "disillusionment", "disincentive", "disinclination", "disinclined", "disinfect", "disinfectant", "disinfest", "disingenuous", "disinherit", "disintegrate", "disinter", "disinterested", "disjoint", "disjointed", "disjunctive", "disk", "dislike", "dislocate", "dislocation", "dislodge", "disloyal", "dismal", "dismantle", "dismast", "dismay", "dismember", "dismiss", "dismissal", "dismount", "disobedient", "disobey", "disoblige", "disorder", "disorderly", "disorganise", "disorganize", "disorientate", "disown", "disparage", "disparate", "disparity", "dispassionate", "dispatch", "dispatches", "dispel", "dispensable", "dispensary", "dispensation", "dispense", "dispenser", "dispersal", "disperse", 
 "dispersion", "dispirit", "displace", "displacement", "display", "displease", "displeasure", "disport", "disposable", "disposal", "dispose", "disposed", "disposition", "dispossess", "dispossessed", "disproof", "disproportion", "disproportionate", "disprove", "disputable", "disputant", "disputation", "disputatious", "dispute", "disqualification", "disqualify", "disquiet", "disquietude", "disquisition", "disregard", "disrelish", "disremember", "disrepair", "disreputable", "disrepute", "disrespect", "disrobe", "disrupt", "dissatisfaction", "dissatisfy", "dissect", "dissection", "dissemble", "disseminate", "dissension", "dissent", "dissenter", "dissenting", "dissertation", "disservice", "dissever", "dissident", "dissimilar", "dissimilarity", "dissimulate", "dissipate", "dissipated", "dissipation", "dissociate", "dissoluble", "dissolute", "dissolution", "dissolve", "dissonance", "dissonant", "dissuade", "distaff", "distal", "distance", "distant", "distantly", "distaste" };
+        internal static string[] data = new string[] {
+            "cash","cashew","cashier","cashmere","casing",
+            "casino","cask","casket","casque","cassava",
+            "casserole","cassette","cassock","cassowary","cast",
+            "castanets","castaway","castellated","caster","castigate",
+            "casting","castle","castor","castrate","casual",
+            "casualty","casuist","casuistry","cat","cataclysm",
+            "catacomb","catafalque","catalepsy","catalog","catalogue",
+            "catalpa","catalysis","catalyst","catamaran","catapult",
+            "cataract","catarrh","catastrophe","catatonic","catcall",
+            "catch","catcher","catching","catchpenny","catchphrase",
+            "catchword","catchy","catechise","catechism","catechize",
+            "categorical","categorise","categorize","category","cater",
+            "caterer","caterpillar","caterwaul","catfish","catgut",
+            "catharsis","cathartic","cathedral","catheter","cathode",
+            "catholic","catholicism","catholicity","catkin","catnap",
+            "catnip","catsup","cattle","catty","catwalk",
+            "caucus","caudal","caught","caul","cauldron",
+            "cauliflower","caulk","causal","causality","causation",
+            "causative","cause","causeless","causeway","caustic",
+            "cauterise","cauterize","caution","cautionary","cautious",
+            "cavalcade","cavalier","cavalry","cavalryman","cave",
+            "caveat","caveman","cavern","cavernous","caviar",
+            "caviare","cavil","cavity","cavort","cavy",
+            "caw","cay","cayman","cease","ceaseless",
+            "cedar","cede","cedilla","ceiling","celandine",
+            "celebrant","celebrate","celebrated","celebration","celebrity",
+            "celerity","celery","celestial","celibacy","celibate",
+            "cell","cellar","cellarage","cellist","cello",
+            "cellophane","cellular","celluloid","cellulose","celsius",
+            "celtic","cement","cemetery","cenotaph","censor",
+            "censorious","censorship","censure","census","cent",
+            "centaur","centavo","centenarian","centenary","centennial",
+            "center","centerboard","centerpiece","centigrade","centigram",
+            "centigramme","centime","centimeter","centimetre","centipede",
+            "central","centralise","centralism","centralize","centre",
+            "centreboard","centrepiece","centrifugal","centrifuge","centripetal",
+            "centrist","centurion","century","cephalic","ceramic",
+            "ceramics","cereal","cerebellum","cerebral","cerebration",
+            "cerebrum","ceremonial","ceremonious","ceremony","cerise",
+            "cert","certain","certainly","certainty","certifiable",
+            "certificate","certificated","certify","certitude","cerulean",
+            "cervical","cervix","cessation","cession","cesspit",
+            "cetacean","chablis","chaconne","chafe","chaff",
+            "chaffinch","chagrin","chain","chair","chairman",
+            "chairmanship","chairperson","chairwoman","chaise","chalet",
+            "chalice","chalk","chalky","challenge","challenging",
+            "chamber","chamberlain","chambermaid","chambers","chameleon",
+            "chamiomile","chamois","chamomile","champ","champagne",
+            "champaign","champion","championship","chance","chancel",
+            "chancellery","chancellor","chancery","chancy","chandelier",
+            "chandler","change","changeable","changeless","changeling",
+            "changeover","channel","chant","chanterelle","chanticleer",
+            "chantry","chanty","chaos","chaotic","chap",
+            "chapel","chapelgoer","chaperon","chaperone","chapfallen",
+            "chaplain","chaplaincy","chaplet","chaps","chapter",
+            "char","charabanc","character","characterise","characteristic",
+            "characterization","characterize","characterless","charade","charades",
+            "charcoal","chard","charge","chargeable","charged",
+            "charger","chariot","charioteer","charisma","charismatic",
+            "charitable","charity","charlady","charlatan","charleston",
+            "charlock","charlotte","charm","charmer","charming",
+            "chart","charter","chartreuse","charwoman","chary",
+            "charybdis","chase","chaser","chasm","chassis",
+            "chaste","chasten","chastise","chastisement","chastity",
+            "chasuble","chat","chatelaine","chattel","chatter",
+            "chatterbox","chatty","chauffeur","chauvinism","chauvinist",
+            "cheap","cheapen","cheapskate","cheat","check",
+            "checkbook","checked","checker","checkerboard","checkers",
+            "checklist","checkmate","checkoff","checkout","checkpoint",
+            "checkrail","checkrein","checkroom","checkup","cheddar",
+            "cheek","cheekbone","cheeky","cheep","cheer",
+            "cheerful","cheering","cheerio","cheerleader","cheerless",
+            "cheers","cheery","cheese","cheesecake","cheesecloth",
+            "cheeseparing","cheetah","chef","chem","chemical",
+            "chemise","chemist","chemistry","chemotherapy","chenille",
+            "cheque","chequebook","chequer","cherish","cheroot",
+            "cherry","cherub","chervil","chess","chessboard",
+            "chessman","chest","chesterfield","chestnut","chesty",
+            "chevalier","chevron","chevvy","chevy","chew",
+            "chi","chianti","chiaroscuro","chic","chicanery",
+            "chicano","chichi","chick","chicken","chickenfeed",
+            "chickenhearted","chickpea","chickweed","chicle","chicory",
+            "chide","chief","chiefly","chieftain","chieftainship",
+            "chiffon","chiffonier","chiffonnier","chigger","chignon",
+            "chihuahua","chilblain","child","childbearing","childbirth",
+            "childhood","childish","childlike","chile","chill",
+            "chiller","chilli","chilly","chimaera","chime",
+            "chimera","chimerical","chimney","chimneybreast","chimneypiece",
+            "chimneypot","chimneystack","chimneysweep","chimpanzee","chin",
+            "china","chinatown","chinaware","chinchilla","chine",
+            "chink","chinless","chinook","chinstrap","chintz",
+            "chinwag","chip","chipboard","chipmunk","chippendale",
+            "chipping","chippy","chiromancy","chiropody","chiropractic",
+            "chirp","chirpy","chisel","chiseler","chiseller",
+            "chit","chitchat","chivalrous","chivalry","chive",
+            "chivvy","chivy","chloride","chlorinate","chlorine",
+            "chloroform","chlorophyll","chock","chocolate","choice",
+            "choir","choirboy","choirmaster","choke","choker",
+            "chokey","choky","choler","cholera","choleric",
+            "cholesterol","chomp","choose","choosey","choosy",
+            "chop","chopfallen","chophouse","chopper","choppers",
+            "choppy","chopstick","choral","chorale","chord",
+            "chore","choreographer","choreography","chorine","chorister",
+            "chortle","chorus","chose","chosen","chow",
+            "chowder","christ","christen","christendom","christening",
+            "christian","christianity","christlike","christmastime","chromatic",
+            "chrome","chromium","chromosome","chronic","chronicle",
+            "chronograph","chronological","chronology","chronometer","chrysalis",
+            "chrysanthemum","chub","chubby","chuck","chuckle",
+            "chug","chukker","chum","chummy","chump",
+            "chunk","chunky","church","churchgoer","churching",
+            "churchwarden","churchyard","churl","churlish","churn",
+            "chute","chutney","cia","cicada","cicatrice",
+            "cicerone","cid","cider","cif","cigar",
+            "cigaret","cigarette","cinch","cincture","cinder",
+            "cinderella","cinders","cine","cinema","cinematograph",
+            "cinematography","cinnamon","cinquefoil","cipher","circa",
+            "circadian","circle","circlet","circuit","circuitous",
+            "circular","circularise","circularize","circulate","circulation",
+            "circumcise","circumcision","circumference","circumflex","circumlocution",
+            "circumnavigate","circumscribe","circumscription","circumspect","circumstance",
+            "circumstances","circumstantial","circumvent","circus","cirque",
+            "cirrhosis","cirrus","cissy","cistern","citadel",
+            "citation","cite","citizen","citizenry","citizenship",
+            "citron","citrous","citrus","city","civet",
+            "civic","civics","civies","civil","civilian",
+            "civilisation","civilise","civility","civilization","civilize",
+            "civilly","civvies","clack","clad","claim",
+            "claimant","clairvoyance","clairvoyant","clam","clambake",
+            "clamber","clammy","clamor","clamorous","clamour",
+            "clamp","clampdown","clamshell","clan","clandestine",
+            "clang","clanger","clangor","clangour","clank",
+            "clannish","clansman","clap","clapboard","clapper",
+            "clapperboard","clappers","claptrap","claque","claret",
+            "clarification","clarify","clarinet","clarinetist","clarinettist",
+            "clarion","clarity","clarts","clash","clasp",
+            "class","classic","classical","classicism","classicist",
+            "classics","classification","classified","classify","classless",
+            "classmate","classroom","classy","clatter","clause",
+            "claustrophobia","claustrophobic","clavichord","clavicle","claw",
+            "clay","claymore","clean","cleaner","cleanliness",
+            "cleanly","cleanse","cleanser","cleanup","clear",
+            "clearance","clearing","clearinghouse","clearly","clearout",
+            "clearway","cleat","cleavage","cleave","cleaver",
+            "clef","cleft","clematis","clemency","clement",
+            "clench","clerestory","clergy","clergyman","clerical",
+            "clerihew","clerk","clever","clew","click",
+            "client","clientele","cliff","cliffhanger","climacteric",
+            "climactic","climate","climatic","climatology","climax",
+            "climb","climber","clime","clinch","clincher",
+            "cline","cling","clinging","clingy","clinic",
+            "clinical","clink","clinker","clip","clipboard",
+            "clipper","clippers","clippie","clipping","clique",
+            "cliquey","cliquish","clitoris","cloaca","cloak",
+            "cloakroom","clobber","cloche","clock","clockwise",
+            "clockwork","clod","cloddish","clodhopper","clog",
+            "cloggy","cloister","clone","clop","close",
+            "closed","closedown","closefisted","closet","closure",
+            "clot","cloth","clothe","clothes","clothesbasket",
+            "clotheshorse","clothesline","clothier","clothing","cloture",
+            "cloud","cloudbank","cloudburst","cloudless","cloudy",
+            "clout","clove","cloven","clover","cloverleaf",
+            "clown","clownish","cloy","club","clubbable",
+            "clubfoot","clubhouse","cluck","clue","clueless",
+            "clump","clumsy","clung","cluster","clutch",
+            "clutches","clutter","coach","coachbuilder","coachman",
+            "coachwork","coadjutor","coagulant","coagulate","coal",
+            "coalbunker","coalesce","coalface","coalfield","coalhole",
+            "coalhouse","coalition","coalmine","coalscuttle","coarse",
+            "coarsen","coast","coastal","coaster","coastguard",
+            "coastguardsman","coastline","coastwise","coat","coating",
+            "coax","cob","cobalt","cobber","cobble",
+            "cobbler","cobblers","cobblestone","cobra","cobweb",
+            "cocaine","coccyx","cochineal","cochlea","cock",
+            "cockade","cockatoo","cockchafer","cockcrow","cockerel",
+            "cockeyed","cockfight","cockhorse","cockle","cockleshell",
+            "cockney","cockpit","cockroach","cockscomb","cocksure",
+            "cocktail","cocky","coco","cocoa","coconut",
+            "cocoon","cod","coda","coddle","code",
+            "codeine","codex","codger","codicil","codify",
+            "codling","codpiece","codswallop","coed","coeducation",
+            "coefficient","coelacanth","coequal","coerce","coercion",
+            "coercive","coeternal","coeval","coexist","coexistence",
+            "coffee","coffeepot","coffer","cofferdam","coffers",
+            "coffin","cog","cogency","cogent","cogitate",
+            "cogitation","cognac","cognate","cognition","cognitive",
+            "cognizance","cognizant","cognomen","cognoscenti","cogwheel",
+            "cohabit","cohere","coherence","coherent","cohesion",
+            "cohesive","cohort","coif","coiffeur","coiffure",
+            "coil","coin","coinage","coincide","coincidence",
+            "coincident","coincidental","coir","coitus","coke",
+            "col","cola","colander","cold","coleslaw",
+            "coley","colic","colicky","colitis","collaborate",
+            "collaboration","collaborationist","collage","collapse","collapsible",
+            "collar","collarbone","collate","collateral","collation",
+            "colleague","collect","collected","collection","collective",
+            "collectivise","collectivism","collectivize","collector","colleen",
+            "college","collegiate","collide","collie","collier",
+            "colliery","collision","collocate","collocation","colloquial",
+            "colloquialism","colloquy","collude","collusion","collywobbles",
+            "cologne","colon","colonel","colonial","colonialism",
+            "colonialist","colonies","colonise","colonist","colonize",
+            "colonnade","colony","color","coloration","coloratura",
+            "colored","colorfast","colorful","coloring","colorless",
+            "colors","colossal","colossally","colossus","colostrum",
+            "colour","coloured","colourfast","colourful","colouring",
+            "colourless","colours","colt","colter","coltish",
+            "columbine","column","columnist","coma","comatose",
+            "comb","combat","combatant","combative","comber",
+            "combination","combinations","combinatorial","combine","combo",
+            "combustible","combustion","come","comeback","comecon",
+            "comedian","comedienne","comedown","comedy","comely",
+            "comer","comestible","comet","comfit","comfort",
+            "comfortable","comforter","comfrey","comfy","comic",
+            "comical","comics","cominform","coming","comintern",
+            "comity","comma","command","commandant","commandeer",
+            "commander","commanding","commandment","commando","commemorate",
+            "commemoration","commemorative","commence","commencement","commend",
+            "commendable","commendation","commendatory","commensurable","commensurate",
+            "comment","commentary","commentate","commentator","commerce",
+            "commercial","commercialise","commercialism","commercialize","commie",
+            "commiserate","commiseration","commissar","commissariat","commissary",
+            "commission","commissionaire","commissioner","commit","commitment",
+            "committal","committed","committee","committeeman","commode",
+            "commodious","commodity","commodore","common","commonage",
+            "commonalty","commoner","commonly","commonplace","commons",
+            "commonweal","commonwealth","commotion","communal","commune",
+            "communicable","communicant","communicate","communication","communications",
+            "communicative","communion","communism","communist","community",
+            "commutable","commutation","commutative","commutator","commute",
+            "commuter","compact","compacted","companion","companionable",
+            "companionship","companionway","company","comparable","comparative",
+            "comparatively","compare","comparison","compartment","compartmentalise",
+            "compartmentalize","compass","compassion","compassionate","compatibility",
+            "compatible","compatriot","compeer","compel","compendious",
+            "compendium","compensate","compensation","compensatory","compere",
+            "compete","competence","competent","competition","competitive",
+            "competitor","compilation","compile","complacency","complacent",
+            "complain","complainant","complaint","complaisance","complaisant",
+            "complement","complementary","complete","completely","completion",
+            "complex","complexion","complexity","compliance","compliant",
+            "complicate","complicated","complication","complicity","compliment",
+            "complimentary","compliments","complin","compline","comply",
+            "compo","component","comport","comportment","compose",
+            "composer","composite","composition","compositor","compost",
+            "composure","compote","compound","comprehend","comprehensible",
+            "comprehension","comprehensive","compress","compressible","compression",
+            "compressor","comprise","compromise","comptometer","comptroller",
+            "compulsion","compulsive","compulsory","compunction","computation",
+            "compute","computer","computerize","comrade","comradeship",
+            "coms","con","concatenate","concatenation","concave",
+            "concavity","conceal","concealment","concede","conceit",
+            "conceited","conceivable","conceive","concentrate","concentrated",
+            "concentration","concentric","concept","conception","conceptual",
+            "conceptualise","conceptualize","concern","concerned","concernedly",
+            "concerning","concert","concerted","concertgoer","concertina",
+            "concertmaster","concerto","concession","concessionaire","concessive",
+            "conch","conchology","concierge","conciliate","conciliation",
+            "conciliatory","concise","concision","conclave","conclude",
+            "conclusion","conclusive","concoct","concoction","concomitance",
+            "concomitant","concord","concordance","concordant","concordat",
+            "concourse","concrete","concubinage","concubine","concupiscence",
+            "concur","concurrence","concurrent","concuss","concussion",
+            "condemn","condemnation","condensation","condense","condenser",
+            "condescend","condescension","condign","condiment","condition",
+            "conditional","conditions","condole","condolence","condom",
+            "condominium","condone","condor","conduce","conducive",
+            "conduct","conduction","conductive","conductivity","conductor",
+            "conduit","cone","coney","confabulate","confabulation",
+            "confection","confectioner","confectionery","confederacy","confederate",
+            "confederation","confer","conference","confess","confessed",
+            "confession","confessional","confessor","confetti","confidant",
+            "confide","confidence","confident","confidential","confiding",
+            "configuration","confine","confinement","confines","confirm",
+            "confirmation","confirmed","confiscate","confiscatory","conflagration",
+            "conflate","conflict","confluence","conform","conformable",
+            "conformation","conformist","conformity","confound","confounded",
+            "confraternity","confront","confrontation","confucian","confucianism",
+            "confuse","confusion","confute","conga","congeal",
+            "congenial","congenital","congest","congestion","conglomerate",
+            "conglomeration","congrats","congratulate","congratulations","congratulatory",
+            "congregate","congregation","congregational","congregationalism","congress",
+            "congressional","congressman","congruent","congruity","congruous",
+            "conic","conical","conifer","coniferous","conj",
+            "conjectural","conjecture","conjoin","conjoint","conjugal",
+            "conjugate","conjugation","conjunction","conjunctiva","conjunctive",
+            "conjunctivitis","conjuncture","conjure","conjurer","conjuror",
+            "conk","conker","conkers","connect","connected",
+            "connection","connective","connexion","connivance","connive",
+            "connoisseur","connotation","connotative","connote","connubial",
+            "conquer","conquest","conquistador","consanguineous","consanguinity",
+            "conscience","conscientious","conscious","consciousness","conscript",
+            "conscription","consecrate","consecration","consecutive","consensus",
+            "consent","consequence","consequent","consequential","consequently",
+            "conservancy","conservation","conservationist","conservatism","conservative",
+            "conservatoire","conservatory","conserve","consider","considerable",
+            "considerably","considerate","consideration","considered","considering",
+            "consign","consignee","consigner","consignment","consignor",
+            "consist","consistency","consistent","consistory","consolation",
+            "consolatory","console","consolidate","consols","consonance",
+            "consonant","consort","consortium","conspectus","conspicuous",
+            "conspiracy","conspirator","conspiratorial","conspire","constable",
+            "constabulary","constancy","constant","constellation","consternation",
+            "constipate","constipation","constituency","constituent","constitute",
+            "constitution","constitutional","constitutionalism","constitutionally","constitutive",
+            "constrain","constrained","constraint","constrict","constriction",
+            "constrictor","construct","construction","constructive","constructor",
+            "construe","consubstantiation","consul","consular","consulate",
+            "consult","consultancy","consultant","consultation","consultative",
+            "consulting","consume","consumer","consummate","consummation",
+            "consumption","consumptive","contact","contagion","contagious",
+            "contain","contained","container","containerise","containerize",
+            "containment","contaminate","contamination","contemplate","contemplation",
+            "contemplative","contemporaneous","contemporary","contempt","contemptible",
+            "contemptuous","contend","contender","content","contented",
+            "contention","contentious","contentment","contents","contest",
+            "contestant","context","contextual","contiguity","contiguous",
+            "continence","continent","continental","contingency","contingent",
+            "continual","continuance","continuation","continue","continuity",
+            "continuo","continuous","continuum","contort","contortion",
+            "contortionist","contour","contraband","contrabass","contraception",
+            "contraceptive","contract","contractile","contraction","contractor",
+            "contractual","contradict","contradiction","contradictory","contradistinction",
+            "contrail","contraindication","contralto","contraption","contrapuntal",
+            "contrariety","contrariwise","contrary","contrast","contravene",
+            "contravention","contretemps","contribute","contribution","contributor",
+            "contributory","contrite","contrition","contrivance","contrive",
+            "contrived","control","controller","controversial","controversy",
+            "controvert","contumacious","contumacy","contumelious","contumely",
+            "contuse","contusion","conundrum","conurbation","convalesce",
+            "convalescence","convalescent","convection","convector","convene",
+            "convener","convenience","convenient","convenor","convent",
+            "conventicle","convention","conventional","conventionality","converge",
+            "conversant","conversation","conversational","conversationalist","conversazione",
+            "converse","conversion","convert","converter","convertible",
+            "convex","convexity","convey","conveyance","conveyancer",
+            "conveyancing","conveyer","conveyor","convict","conviction",
+            "convince","convinced","convincing","convivial","convocation",
+            "convoke","convoluted","convolution","convolvulus","convoy",
+            "convulse","convulsion","convulsive","cony","coo",
+            "cook","cooker","cookery","cookhouse","cookie",
+            "cooking","cookout","cool","coolant","cooler",
+            "coolie","coon","coop","cooper","cooperate",
+            "cooperation","cooperative","coordinate","coordinates","coordination",
+            "coot","cop","cope","copeck","copier",
+            "copilot","coping","copingstone","copious","copper",
+            "copperhead","copperplate","coppersmith","coppice","copra",
+            "coptic","copula","copulate","copulative","copy",
+            "copybook","copyboy","copycat","copydesk","copyhold",
+            "copyist","copyright","copywriter","coquetry","coquette",
+            "cor","coracle","coral","corbel","cord",
+            "cordage","cordial","cordiality","cordially","cordillera",
+            "cordite","cordon","cords","corduroy","core",
+            "corelate","coreligionist","corer","corespondent","corgi",
+            "coriander","corinthian","cork","corkage","corked",
+            "corker","corkscrew","corm","cormorant","corn",
+            "corncob","corncrake","cornea","cornelian","corner",
+            "cornerstone","cornet","cornfield","cornflakes","cornflower",
+            "cornice","cornish","cornucopia","corny","corolla",
+            "corollary","corona","coronary","coronation","coroner",
+            "coronet","corpora","corporal","corporate","corporation",
+            "corporeal","corps","corpse","corpulence","corpulent",
+            "corpus","corpuscle","corral","correct","correction",
+            "correctitude","corrective","correlate","correlation","correlative",
+            "correspond","correspondence","correspondent","corresponding","corridor",
+            "corrie","corrigendum","corroborate","corroboration","corroborative",
+            "corroboree","corrode","corrosion","corrosive","corrugate",
+            "corrugation","corrupt","corruption","corsage","corsair",
+            "corse","corselet","corset","cortex","cortisone",
+            "corundum","coruscate","corvette","cos","cosh",
+            "cosignatory","cosine","cosmetic","cosmetician","cosmic",
+            "cosmogony","cosmology","cosmonaut","cosmopolitan","cosmos",
+            "cosset","cost","costermonger","costive","costly",
+            "costs","costume","costumier","cosy","cot",
+            "cotangent","cote","coterie","coterminous","cotillion",
+            "cottage","cottager","cottar","cotter","cotton",
+            "cottonseed","cottontail","cotyledon","couch","couchant",
+            "couchette","cougar","cough","could","couldst",
+            "coulter","council","councillor","counsel","counsellor",
+            "counselor","count","countable","countdown","countenance",
+            "counter","counteract","counterattack","counterattraction","counterbalance",
+            "counterblast","counterclaim","counterclockwise","counterespionage","counterfeit",
+            "counterfoil","counterintelligence","counterirritant","countermand","countermarch",
+            "countermeasure","counteroffensive","counterpane","counterpart","counterpoint",
+            "counterpoise","countersign","countersink","countertenor","countervail",
+            "countess","countinghouse","countless","countrified","country",
+            "countryman","countryside","county","coup","couple",
+            "couplet","coupling","coupon","courage","courageous",
+            "courgette","courier","course","courser","coursing",
+            "court","courteous","courtesan","courtesy","courthouse",
+            "courtier","courting","courtly","courtroom","courtship",
+            "courtyard","couscous","cousin","couture","cove",
+            "coven","covenant","coventry","cover","coverage",
+            "covering","coverlet","covert","covet","covetous",
+            "covey","cow","coward","cowardice","cowardly",
+            "cowbell","cowboy","cowcatcher","cower","cowgirl",
+            "cowhand","cowheel","cowherd","cowhide","cowl",
+            "cowlick","cowling","cowman","cowpat","cowpox",
+            "cowrie","cowry","cowshed","cowslip","cox",
+            "coxcomb","coy","coyote","coypu","cozen",
+            "cozy","cpa","crab","crabbed","crabby",
+            "crabgrass","crabwise","crack","crackbrained","crackdown",
+            "cracked","cracker","crackers","crackle","crackleware",
+            "crackling","crackpot","cracksman","crackup","cradle",
+            "craft","craftsman","crafty","crag","craggy",
+            "crake","cram","crammer","cramp","cramped",
+            "crampon","cramps","cranberry","crane","cranial",
+            "cranium","crank","crankshaft","cranky","cranny",
+            "crap","crape","crappy","craps","crash",
+            "crashing","crass","crate","crater","cravat",
+            "crave","craven","craving","crawl","crawler",
+            "crawlers","crayfish","crayon","craze","crazy",
+            "creak","creaky","cream","creamer","creamery",
+            "creamy","crease","create","creation","creative",
+            "creativity","creator","creature","credence","credentials",
+            "credibility","credible","credit","creditable","creditor",
+            "credo","credulous","creed","creek","creel",
+            "creep","creeper","creepers","creeps","creepy",
+            "cremate","crematorium","crenelated","crenellated","creole",
+            "creosote","crept","crepuscular","crescendo","crescent",
+            "cress","crest","crested","crestfallen","cretaceous",
+            "cretin","cretonne","crevasse","crevice","crew",
+            "crewman","crib","cribbage","crick","cricket",
+            "cricketer","crier","cries","crikey","crime",
+            "criminal","criminology","crimp","crimplene","crimson",
+            "cringe","crinkle","crinkly","crinoid","crinoline",
+            "cripes","cripple","crisis","crisp","crispy",
+            "crisscross","criterion","critic","critical","criticise",
+            "criticism","criticize","critique","critter","croak",
+            "crochet","crock","crockery","crocodile","crocus",
+            "croft","crofter","croissant","cromlech","crone",
+            "crony","crook","crooked","croon","crooner",
+            "crop","cropper","croquet","croquette","crore",
+            "crosier","cross","crossbar","crossbeam","crossbenches",
+            "crossbones","crossbow","crossbred","crossbreed","crosscheck",
+            "crosscurrent","crosscut","crossfire","crossing","crossover",
+            "crosspatch","crosspiece","crossply","crossroad","crossroads",
+            "crosstree","crosswalk","crosswind","crosswise","crossword",
+            "crotch","crotchet","crotchety","crouch","croup",
+            "croupier","crouton","crow","crowbar","crowd",
+            "crowded","crowfoot","crown","crozier","crucial",
+            "crucible","crucifix","crucifixion","cruciform","crucify",
+            "crude","crudity","cruel","cruelty","cruet",
+            "cruise","cruiser","crumb","crumble","crumbly",
+            "crummy","crumpet","crumple","crunch","crupper",
+            "crusade","cruse","crush","crust","crustacean",
+            "crusty","crutch","crux","cry","crybaby",
+            "crying","crypt","cryptic","cryptogram","cryptography",
+            "crystal","crystalline","crystallise","crystallize","cub",
+            "cubbyhole","cube","cubic","cubical","cubicle",
+            "cubism","cubit","cubs","cuckold","cuckoldry",
+            "cuckoo","cucumber","cud","cuddle","cuddlesome",
+            "cuddly","cudgel","cue","cuff","cuffs",
+            "cuirass","cuisine","culinary","cull","cullender",
+            "culminate","culmination","culotte","culottes","culpable",
+            "culprit","cult","cultivable","cultivate","cultivated",
+            "cultivation","cultivator","cultural","culture","cultured",
+            "culvert","cumber","cumbersome","cumin","cummerbund",
+            "cumulative","cumulonimbus","cumulus","cuneiform","cunnilingus",
+            "cunning","cunt","cup","cupbearer","cupboard",
+            "cupid","cupidity","cupola","cuppa","cupping",
+            "cupric","cur","curable","curacy","curate",
+            "curative","curator","curb","curd","curdle",
+            "cure","curettage","curfew","curia","curio",
+            "curiosity","curious","curl","curler","curlew",
+            "curlicue","curling","curly","curlycue","curmudgeon",
+            "currant","currency","current","curriculum","currish",
+            "curry","curse","cursed","cursive","cursory",
+            "curt","curtail","curtain","curtains","curtsey",
+            "curtsy","curvaceous","curvacious","curvature","curve",
+            "cushion","cushy","cusp","cuspidor","cuss",
+            "cussed","custard","custodial","custodian","custody",
+            "custom","customary","customer","customs","cut",
+            "cutaway","cutback","cuticle","cutlass","cutler",
+            "cutlery","cutlet","cutoff","cutout","cutpurse",
+            "cutter","cutthroat","cutting","cuttlefish","cutworm",
+            "cwm","cwt","cyanide","cybernetics","cyclamate",
+            "cyclamen","cycle","cyclic","cyclist","cyclone",
+            "cyclopaedia","cyclopedia","cyclostyle","cyclotron","cyder",
+            "cygnet","cylinder","cymbal","cynic","cynical",
+            "cynicism","cynosure","cypher","cypress","cyrillic",
+            "cyst","cystitis","cytology","czar","czarina",
+            "czech","dab","dabble","dabchick","dabs",
+            "dace","dachshund","dactyl","dad","daddy",
+            "dado","daemon","daffodil","daft","dagger",
+            "dago","daguerreotype","dahlia","daily","dainty",
+            "daiquiri","dairy","dairying","dairymaid","dairyman",
+            "dais","daisy","dale","dalliance","dally",
+            "dalmation","dam","damage","damages","damascene",
+            "damask","damn","damnable","damnation","damnedest",
+            "damning","damocles","damp","dampen","damper",
+            "dampish","damsel","damson","dance","dandelion",
+            "dander","dandified","dandle","dandruff","dandy",
+            "danger","dangerous","dangle","dank","dapper",
+            "dappled","dare","daredevil","daresay","daring",
+            "dark","darken","darkey","darkroom","darky",
+            "darling","darn","darning","dart","dartboard",
+            "dartmoor","darts","dash","dashboard","dashed",
+            "dashing","data","date","dated","dateless",
+            "dateline","dates","dative","daub","daughter",
+            "daunt","dauntless","dauphin","davit","dawdle",
+            "dawn","day","dayboy","daybreak","daydream",
+            "daylight","dayroom","days","daytime","daze",
+            "dazzle","ddt","deacon","dead","deaden",
+            "deadline","deadlock","deadly","deadpan","deadweight",
+            "deaf","deafen","deal","dealer","dealing",
+            "dealings","dean","deanery","dear","dearest",
+            "dearie","dearly","dearth","deary","death",
+            "deathbed","deathblow","deathless","deathlike","deathly",
+            "deathwatch","deb","debar","debark","debase",
+            "debatable","debate","debater","debauch","debauchee",
+            "debauchery","debenture","debilitate","debility","debit",
+            "debonair","debone","debouch","debrief","debris",
+            "debt","debtor","debug","debunk","debut",
+            "debutante","decade","decadence","decadent","decalogue",
+            "decamp","decant","decanter","decapitate","decathlon",
+            "decay","decease","deceased","deceit","deceitful",
+            "deceive","decelerate","december","decencies","decency",
+            "decent","decentralise","decentralize","deception","deceptive",
+            "decibel","decide","decided","decidedly","deciduous",
+            "decimal","decimalise","decimalize","decimate","decipher",
+            "decision","decisive","deck","deckchair","deckhand",
+            "declaim","declamation","declaration","declare","declared",
+            "declassify","declension","declination","decline","declivity",
+            "declutch","decoction","decode","decolonise","decolonize",
+            "decompose","decompress","decongestant","decontaminate","decontrol",
+            "decorate","decoration","decorative","decorator","decorous",
+            "decorum","decoy","decrease","decree","decrepit",
+            "decrepitude","decry","dedicate","dedicated","dedication",
+            "deduce","deduct","deduction","deductive","deed",
+            "deem","deep","deepen","deer","deerstalker",
+            "def","deface","defame","default","defeat",
+            "defeatism","defecate","defect","defection","defective",
+            "defence","defend","defendant","defense","defensible",
+            "defensive","defer","deference","defiance","defiant",
+            "deficiency","deficient","deficit","defile","define",
+            "definite","definitely","definition","definitive","deflate",
+            "deflation","deflationary","deflect","deflection","deflower",
+            "defoliant","defoliate","deforest","deform","deformation",
+            "deformity","defraud","defray","defrock","defrost",
+            "deft","defunct","defuse","defy","degauss",
+            "degeneracy","degenerate","degeneration","degenerative","degrade",
+            "degree","dehorn","dehumanise","dehumanize","dehydrate",
+            "deice","deification","deify","deign","deism",
+            "deity","dejected","dejection","dekko","delay",
+            "delectable","delectation","delegacy","delegate","delegation",
+            "delete","deleterious","deletion","delft","deliberate",
+            "deliberation","deliberative","delicacy","delicate","delicatessen",
+            "delicious","delight","delightful","delimit","delineate",
+            "delinquency","delinquent","deliquescent","delirious","delirium",
+            "deliver","deliverance","delivery","deliveryman","dell",
+            "delouse","delphic","delphinium","delta","delude",
+            "deluge","delusion","delusive","delve","demagnetise",
+            "demagnetize","demagogic","demagogue","demagoguery","demand",
+            "demanding","demarcate","demarcation","demean","demeanor",
+            "demeanour","demented","demerit","demesne","demigod",
+            "demijohn","demilitarise","demilitarize","demise","demist",
+            "demister","demo","demob","demobilise","demobilize",
+            "democracy","democrat","democratic","democratise","democratize",
+            "demography","demolish","demolition","demon","demonetise",
+            "demonetize","demoniacal","demonic","demonstrable","demonstrate",
+            "demonstration","demonstrative","demonstrator","demoralise","demoralize",
+            "demote","demotic","demur","demure","demystify",
+            "den","denationalise","denationalize","denial","denier",
+            "denigrate","denim","denims","denizen","denominate",
+            "denomination","denominational","denominator","denotation","denote",
+            "denouement","denounce","dense","density","dent",
+            "dental","dentifrice","dentist","dentistry","denture",
+            "dentures","denude","denunciation","deny","deodorant",
+            "deodorise","deodorize","depart","departed","department",
+            "departure","depend","dependable","dependant","dependence",
+            "dependency","dependent","depict","depilatory","deplete",
+            "deplorable","deplore","deploy","deponent","depopulate",
+            "deport","deportee","deportment","depose","deposit",
+            "deposition","depositor","depository","depot","deprave",
+            "depravity","deprecate","deprecatory","depreciate","depreciatory",
+            "depredation","depress","depressed","depression","deprivation",
+            "deprive","deprived","depth","depths","deputation",
+            "depute","deputise","deputize","deputy","derail",
+            "derange","derby","derelict","dereliction","deride",
+            "derision","derisive","derisory","derivative","derive",
+            "dermatitis","dermatology","derogate","derogatory","derrick",
+            "derv","dervish","des","desalinise","desalinize",
+            "descale","descant","descend","descendant","descended",
+            "descent","describe","description","descriptive","descry",
+            "desecrate","desegregate","desensitise","desensitize","desert",
+            "deserter","desertion","deserts","deserve","deservedly",
+            "deserving","desiccant","desiccate","desideratum","design",
+            "designate","designation","designedly","designer","designing",
+            "designs","desirable","desire","desirous","desist",
+            "desk","deskwork","desolate","despair","despairing",
+            "despatch","despatches","desperado","desperate","desperation",
+            "despicable","despise","despite","despoil","despondent",
+            "despot","despotic","despotism","dessert","dessertspoon",
+            "dessertspoonful","destination","destined","destiny","destitute",
+            "destroy","destroyer","destruction","destructive","desuetude",
+            "desultory","detach","detached","detachedly","detachment",
+            "detail","detailed","detain","detainee","detect",
+            "detection","detective","detector","detention","deter",
+            "detergent","deteriorate","determinant","determination","determine",
+            "determined","determiner","determinism","deterrent","detest",
+            "dethrone","detonate","detonation","detonator","detour",
+            "detract","detractor","detrain","detriment","detritus",
+            "deuce","deuced","deuteronomy","devaluation","devalue",
+            "devastate","devastating","develop","developer","development",
+            "developmental","deviance","deviant","deviate","deviation",
+            "deviationist","device","devil","devilish","devilishly",
+            "devilment","devious","devise","devitalise","devitalize",
+            "devoid","devolution","devolve","devote","devoted",
+            "devotee","devotion","devotional","devotions","devour",
+            "devout","devoutly","dew","dewdrop","dewlap",
+            "dewpond","dewy","dexterity","dexterous","dextrose",
+            "dhoti","dhow","diabetes","diabetic","diabolic",
+            "diabolical","diacritic","diacritical","diadem","diaeresis",
+            "diagnose","diagnosis","diagnostic","diagonal","diagram",
+            "dial","dialect","dialectic","dialectician","dialog",
+            "dialogue","diameter","diametrically","diamond","diaper",
+            "diaphanous","diaphragm","diarist","diarrhea","diarrhoea",
+            "diary","diaspora","diatom","diatribe","dibble",
+            "dice","dicey","dichotomy","dick","dicker",
+            "dickie","dicky","dickybird","dictaphone","dictate",
+            "dictation","dictator","dictatorial","dictatorship","diction",
+            "dictionary","dictum","did","didactic","diddle",
+            "didst","die","diehard","dieresis","diet",
+            "dietary","dietetic","dietetics","dietician","dietitian",
+            "differ","difference","different","differential","differentiate",
+            "difficult","difficulty","diffident","diffract","diffuse",
+            "diffusion","dig","digest","digestion","digestive",
+            "digger","digging","diggings","digit","digital",
+            "dignified","dignify","dignitary","dignity","digraph",
+            "digress","digression","digs","dike","dilapidated",
+            "dilapidation","dilapidations","dilate","dilatory","dildo",
+            "dilemma","dilettante","diligence","diligent","dill",
+            "dillydally","dilute","dilution","dim","dimension",
+            "dimensions","diminish","diminuendo","diminution","diminutive",
+            "dimity","dimple","dimwit","din","dinar",
+            "dine","diner","dingdong","dinghy","dingle",
+            "dingo","dingy","dink","dinkum","dinky",
+            "dinner","dinosaur","dint","diocese","dioxide",
+            "dip","diphtheria","diphthong","diploma","diplomacy",
+            "diplomat","diplomatic","diplomatically","diplomatist","dipper",
+            "dipsomania","dipsomaniac","dipstick","dipswitch","diptych",
+            "dire","direct","direction","directional","directions",
+            "directive","directly","director","directorate","directorship",
+            "directory","direful","dirge","dirigible","dirk",
+            "dirndl","dirt","dirty","disability","disable",
+            "disabled","disabuse","disadvantage","disadvantageous","disaffected",
+            "disaffection","disaffiliate","disafforest","disagree","disagreeable",
+            "disagreement","disallow","disappear","disappearance","disappoint",
+            "disappointed","disappointing","disappointment","disapprobation","disapproval",
+            "disapprove","disarm","disarmament","disarrange","disarray",
+            "disassociate","disaster","disastrous","disavow","disband",
+            "disbar","disbelief","disbelieve","disburden","disburse",
+            "disbursement","disc","discard","discern","discerning",
+            "discernment","discharge","disciple","discipleship","disciplinarian",
+            "disciplinary","discipline","disclaim","disclaimer","disclose",
+            "disclosure","disco","discolor","discoloration","discolour",
+            "discolouration","discomfit","discomfiture","discomfort","discommode",
+            "discompose","disconcert","disconnect","disconnected","disconnection",
+            "disconsolate","discontent","discontented","discontinue","dis

<TRUNCATED>

[30/50] [abbrv] lucenenet git commit: Fixed NotImplementedException in Join.TermsWithScoreCollector.Mv.AcceptsDocsOutOfOrder()

Posted by sy...@apache.org.
Fixed NotImplementedException in Join.TermsWithScoreCollector.Mv.AcceptsDocsOutOfOrder()


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/55024152
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/55024152
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/55024152

Branch: refs/heads/analysis-work
Commit: 55024152e8c1b5502417c3d8fdfa5f85f58fc700
Parents: 0f9f0ce
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 10:06:26 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 10:06:26 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Join/TermsWithScoreCollector.cs | 9 ---------
 1 file changed, 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/55024152/src/Lucene.Net.Join/TermsWithScoreCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsWithScoreCollector.cs b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
index e823293..f4b2e55 100644
--- a/src/Lucene.Net.Join/TermsWithScoreCollector.cs
+++ b/src/Lucene.Net.Join/TermsWithScoreCollector.cs
@@ -56,8 +56,6 @@ namespace Lucene.Net.Join
             }
         }
 
-        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-        //ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
         public override Scorer Scorer
         {
             set
@@ -261,8 +259,6 @@ namespace Lucene.Net.Join
                 }
             }
 
-            //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-            //ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
             public override AtomicReaderContext NextReader
             {
                 set
@@ -271,11 +267,6 @@ namespace Lucene.Net.Join
                 }
             }
 
-            public override bool AcceptsDocsOutOfOrder()
-            {
-                throw new NotImplementedException();
-            }
-
             internal class Avg : Mv
             {
                 private int[] _scoreCounts = new int[InitialArraySize];


[39/50] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
index d3fa779..33bc310 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/HyphenationTree.cs
@@ -1,528 +1,580 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-using System;
+\ufeffusing System;
 using System.Collections.Generic;
+using System.IO;
 using System.Text;
+using System.Xml;
 
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
     /// <summary>
 	/// This tree structure stores the hyphenation patterns in an efficient way for
 	/// fast lookup. It provides the provides the method to hyphenate a word.
 	/// 
 	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
 	/// </summary>
-	public class HyphenationTree : TernaryTree, PatternConsumer
-	{
-
-	  /// <summary>
-	  /// value space: stores the interletter values
-	  /// </summary>
-	  protected internal ByteVector vspace;
-
-	  /// <summary>
-	  /// This map stores hyphenation exceptions
-	  /// </summary>
-	  protected internal Dictionary<string, List<object>> stoplist;
-
-	  /// <summary>
-	  /// This map stores the character classes
-	  /// </summary>
-	  protected internal TernaryTree classmap;
-
-	  /// <summary>
-	  /// Temporary map to store interletter values on pattern loading.
-	  /// </summary>
-	  [NonSerialized]
-	  private TernaryTree ivalues;
-
-	  public HyphenationTree()
-	  {
-		stoplist = new Dictionary<>(23); // usually a small table
-		classmap = new TernaryTree();
-		vspace = new ByteVector();
-		vspace.alloc(1); // this reserves index 0, which we don't use
-	  }
-
-	  /// <summary>
-	  /// Packs the values by storing them in 4 bits, two values into a byte Values
-	  /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
-	  /// value.
-	  /// </summary>
-	  /// <param name="values"> a string of digits from '0' to '9' representing the
-	  ///        interletter values. </param>
-	  /// <returns> the index into the vspace array where the packed values are stored. </returns>
-	  protected internal virtual int packValues(string values)
-	  {
-		int i , n = values.Length;
-		int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
-		int offset = vspace.alloc(m);
-		sbyte[] va = vspace.Array;
-		for (i = 0; i < n; i++)
-		{
-		  int j = i >> 1;
-		  sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
-		  if ((i & 1) == 1)
-		  {
-			va[j + offset] = (sbyte)(va[j + offset] | v);
-		  }
-		  else
-		  {
-			va[j + offset] = (sbyte)(v << 4); // big endian
-		  }
-		}
-		va[m - 1 + offset] = 0; // terminator
-		return offset;
-	  }
-
-	  protected internal virtual string unpackValues(int k)
-	  {
-		StringBuilder buf = new StringBuilder();
-		sbyte v = vspace.get(k++);
-		while (v != 0)
-		{
-		  char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
-		  buf.Append(c);
-		  c = (char)(v & 0x0f);
-		  if (c == 0)
-		  {
-			break;
-		  }
-		  c = (char)(c - 1 + '0');
-		  buf.Append(c);
-		  v = vspace.get(k++);
-		}
-		return buf.ToString();
-	  }
-
-	  /// <summary>
-	  /// Read hyphenation patterns from an XML file.
-	  /// </summary>
-	  /// <param name="f"> the filename </param>
-	  /// <exception cref="IOException"> In case the parsing fails </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void loadPatterns(java.io.File f) throws java.io.IOException
-	  public virtual void loadPatterns(File f)
-	  {
-		InputSource src = new InputSource(f.toURI().toASCIIString());
-		loadPatterns(src);
-	  }
-
-	  /// <summary>
-	  /// Read hyphenation patterns from an XML file.
-	  /// </summary>
-	  /// <param name="source"> the InputSource for the file </param>
-	  /// <exception cref="IOException"> In case the parsing fails </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void loadPatterns(org.xml.sax.InputSource source) throws java.io.IOException
-	  public virtual void loadPatterns(InputSource source)
-	  {
-		PatternParser pp = new PatternParser(this);
-		ivalues = new TernaryTree();
-
-		pp.parse(source);
-
-		// patterns/values should be now in the tree
-		// let's optimize a bit
-		trimToSize();
-		vspace.trimToSize();
-		classmap.trimToSize();
-
-		// get rid of the auxiliary map
-		ivalues = null;
-	  }
-
-	  public virtual string findPattern(string pat)
-	  {
-		int k = base.find(pat);
-		if (k >= 0)
-		{
-		  return unpackValues(k);
-		}
-		return "";
-	  }
-
-	  /// <summary>
-	  /// String compare, returns 0 if equal or t is a substring of s
-	  /// </summary>
-	  protected internal virtual int hstrcmp(char[] s, int si, char[] t, int ti)
-	  {
-		for (; s[si] == t[ti]; si++, ti++)
-		{
-		  if (s[si] == 0)
-		  {
-			return 0;
-		  }
-		}
-		if (t[ti] == 0)
-		{
-		  return 0;
-		}
-		return s[si] - t[ti];
-	  }
-
-	  protected internal virtual sbyte[] getValues(int k)
-	  {
-		StringBuilder buf = new StringBuilder();
-		sbyte v = vspace.get(k++);
-		while (v != 0)
-		{
-		  char c = (char)(((int)((uint)v >> 4)) - 1);
-		  buf.Append(c);
-		  c = (char)(v & 0x0f);
-		  if (c == 0)
-		  {
-			break;
-		  }
-		  c = (char)(c - 1);
-		  buf.Append(c);
-		  v = vspace.get(k++);
-		}
-		sbyte[] res = new sbyte[buf.Length];
-		for (int i = 0; i < res.Length; i++)
-		{
-		  res[i] = (sbyte) buf[i];
-		}
-		return res;
-	  }
-
-	  /// <summary>
-	  /// <para>
-	  /// Search for all possible partial matches of word starting at index an update
-	  /// interletter values. In other words, it does something like:
-	  /// </para>
-	  /// <code>
-	  /// for(i=0; i&lt;patterns.length; i++) {
-	  /// if ( word.substring(index).startsWidth(patterns[i]) )
-	  /// update_interletter_values(patterns[i]);
-	  /// }
-	  /// </code>
-	  /// <para>
-	  /// But it is done in an efficient way since the patterns are stored in a
-	  /// ternary tree. In fact, this is the whole purpose of having the tree: doing
-	  /// this search without having to test every single pattern. The number of
-	  /// patterns for languages such as English range from 4000 to 10000. Thus,
-	  /// doing thousands of string comparisons for each word to hyphenate would be
-	  /// really slow without the tree. The tradeoff is memory, but using a ternary
-	  /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
-	  /// It's also faster than using a hash table
-	  /// </para>
-	  /// </summary>
-	  /// <param name="word"> null terminated word to match </param>
-	  /// <param name="index"> start index from word </param>
-	  /// <param name="il"> interletter values array to update </param>
-	  protected internal virtual void searchPatterns(char[] word, int index, sbyte[] il)
-	  {
-		sbyte[] values;
-		int i = index;
-		char p, q;
-		char sp = word[i];
-		p = root;
-
-		while (p > 0 && p < sc.Length)
-		{
-		  if (sc[p] == 0xFFFF)
-		  {
-			if (hstrcmp(word, i, kv.Array, lo[p]) == 0)
-			{
-			  values = getValues(eq[p]); // data pointer is in eq[]
-			  int j = index;
-			  for (int k = 0; k < values.Length; k++)
-			  {
-				if (j < il.Length && values[k] > il[j])
-				{
-				  il[j] = values[k];
-				}
-				j++;
-			  }
-			}
-			return;
-		  }
-		  int d = sp - sc[p];
-		  if (d == 0)
-		  {
-			if (sp == 0)
-			{
-			  break;
-			}
-			sp = word[++i];
-			p = eq[p];
-			q = p;
-
-			// look for a pattern ending at this position by searching for
-			// the null char ( splitchar == 0 )
-			while (q > 0 && q < sc.Length)
-			{
-			  if (sc[q] == 0xFFFF) // stop at compressed branch
-			  {
-				break;
-			  }
-			  if (sc[q] == 0)
-			  {
-				values = getValues(eq[q]);
-				int j = index;
-				for (int k = 0; k < values.Length; k++)
-				{
-				  if (j < il.Length && values[k] > il[j])
-				  {
-					il[j] = values[k];
-				  }
-				  j++;
-				}
-				break;
-			  }
-			  else
-			  {
-				q = lo[q];
-
-				/// <summary>
-				/// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
-				/// java chars are unsigned
-				/// </summary>
-			  }
-			}
-		  }
-		  else
-		  {
-			p = d < 0 ? lo[p] : hi[p];
-		  }
-		}
-	  }
-
-	  /// <summary>
-	  /// Hyphenate word and return a Hyphenation object.
-	  /// </summary>
-	  /// <param name="word"> the word to be hyphenated </param>
-	  /// <param name="remainCharCount"> Minimum number of characters allowed before the
-	  ///        hyphenation point. </param>
-	  /// <param name="pushCharCount"> Minimum number of characters allowed after the
-	  ///        hyphenation point. </param>
-	  /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-	  ///         hyphenated word or null if word is not hyphenated. </returns>
-	  public virtual Hyphenation hyphenate(string word, int remainCharCount, int pushCharCount)
-	  {
-		char[] w = word.ToCharArray();
-		return hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
-	  }
-
-	  /// <summary>
-	  /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
-	  /// may be absent, the first n is at offset, the first l is at offset +
-	  /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
-	  /// into word. In the first part of the routine len = w.length, in the second
-	  /// part of the routine len = word.length. Three indices are used: index(w),
-	  /// the index in w, index(word), the index in word, letterindex(word), the
-	  /// index in the letter part of word. The following relations exist: index(w) =
-	  /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
-	  /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
-	  /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
-	  /// iIgnoreAtBeginning
-	  /// </summary>
-
-	  /// <summary>
-	  /// Hyphenate word and return an array of hyphenation points.
-	  /// </summary>
-	  /// <param name="w"> char array that contains the word </param>
-	  /// <param name="offset"> Offset to first character in word </param>
-	  /// <param name="len"> Length of word </param>
-	  /// <param name="remainCharCount"> Minimum number of characters allowed before the
-	  ///        hyphenation point. </param>
-	  /// <param name="pushCharCount"> Minimum number of characters allowed after the
-	  ///        hyphenation point. </param>
-	  /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
-	  ///         hyphenated word or null if word is not hyphenated. </returns>
-	  public virtual Hyphenation hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
-	  {
-		int i;
-		char[] word = new char[len + 3];
-
-		// normalize word
-		char[] c = new char[2];
-		int iIgnoreAtBeginning = 0;
-		int iLength = len;
-		bool bEndOfLetters = false;
-		for (i = 1; i <= len; i++)
-		{
-		  c[0] = w[offset + i - 1];
-		  int nc = classmap.find(c, 0);
-		  if (nc < 0) // found a non-letter character ...
-		  {
-			if (i == (1 + iIgnoreAtBeginning))
-			{
-			  // ... before any letter character
-			  iIgnoreAtBeginning++;
-			}
-			else
-			{
-			  // ... after a letter character
-			  bEndOfLetters = true;
-			}
-			iLength--;
-		  }
-		  else
-		  {
-			if (!bEndOfLetters)
-			{
-			  word[i - iIgnoreAtBeginning] = (char) nc;
-			}
-			else
-			{
-			  return null;
-			}
-		  }
-		}
-		len = iLength;
-		if (len < (remainCharCount + pushCharCount))
-		{
-		  // word is too short to be hyphenated
-		  return null;
-		}
-		int[] result = new int[len + 1];
-		int k = 0;
-
-		// check exception list first
-		string sw = new string(word, 1, len);
-		if (stoplist.ContainsKey(sw))
-		{
-		  // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
-		  // null)
-		  List<object> hw = stoplist[sw];
-		  int j = 0;
-		  for (i = 0; i < hw.Count; i++)
-		  {
-			object o = hw[i];
-			// j = index(sw) = letterindex(word)?
-			// result[k] = corresponding index(w)
-			if (o is string)
-			{
-			  j += ((string) o).Length;
-			  if (j >= remainCharCount && j < (len - pushCharCount))
-			  {
-				result[k++] = j + iIgnoreAtBeginning;
-			  }
-			}
-		  }
-		}
-		else
-		{
-		  // use algorithm to get hyphenation points
-		  word[0] = '.'; // word start marker
-		  word[len + 1] = '.'; // word end marker
-		  word[len + 2] = (char)0; // null terminated
-		  sbyte[] il = new sbyte[len + 3]; // initialized to zero
-		  for (i = 0; i < len + 1; i++)
-		  {
-			searchPatterns(word, i, il);
-		  }
-
-		  // hyphenation points are located where interletter value is odd
-		  // i is letterindex(word),
-		  // i + 1 is index(word),
-		  // result[k] = corresponding index(w)
-		  for (i = 0; i < len; i++)
-		  {
-			if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
-			{
-			  result[k++] = i + iIgnoreAtBeginning;
-			}
-		  }
-		}
-
-		if (k > 0)
-		{
-		  // trim result array
-		  int[] res = new int[k + 2];
-		  Array.Copy(result, 0, res, 1, k);
-		  // We add the synthetical hyphenation points
-		  // at the beginning and end of the word
-		  res[0] = 0;
-		  res[k + 1] = len;
-		  return new Hyphenation(res);
-		}
-		else
-		{
-		  return null;
-		}
-	  }
-
-	  /// <summary>
-	  /// Add a character class to the tree. It is used by
-	  /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
-	  /// Character classes define the valid word characters for hyphenation. If a
-	  /// word contains a character not defined in any of the classes, it is not
-	  /// hyphenated. It also defines a way to normalize the characters in order to
-	  /// compare them with the stored patterns. Usually pattern files use only lower
-	  /// case characters, in this case a class for letter 'a', for example, should
-	  /// be defined as "aA", the first character being the normalization char.
-	  /// </summary>
-	  public virtual void addClass(string chargroup)
-	  {
-		if (chargroup.Length > 0)
-		{
-		  char equivChar = chargroup[0];
-		  char[] key = new char[2];
-		  key[1] = (char)0;
-		  for (int i = 0; i < chargroup.Length; i++)
-		  {
-			key[0] = chargroup[i];
-			classmap.insert(key, 0, equivChar);
-		  }
-		}
-	  }
-
-	  /// <summary>
-	  /// Add an exception to the tree. It is used by
-	  /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
-	  /// hyphenation exceptions.
-	  /// </summary>
-	  /// <param name="word"> normalized word </param>
-	  /// <param name="hyphenatedword"> a vector of alternating strings and
-	  ///        <seealso cref="Hyphen hyphen"/> objects. </param>
-	  public virtual void addException(string word, List<object> hyphenatedword)
-	  {
-		stoplist[word] = hyphenatedword;
-	  }
-
-	  /// <summary>
-	  /// Add a pattern to the tree. Mainly, to be used by
-	  /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
-	  /// the tree.
-	  /// </summary>
-	  /// <param name="pattern"> the hyphenation pattern </param>
-	  /// <param name="ivalue"> interletter weight values indicating the desirability and
-	  ///        priority of hyphenating at a given point within the pattern. It
-	  ///        should contain only digit characters. (i.e. '0' to '9'). </param>
-	  public virtual void addPattern(string pattern, string ivalue)
-	  {
-		int k = ivalues.find(ivalue);
-		if (k <= 0)
-		{
-		  k = packValues(ivalue);
-		  ivalues.insert(ivalue, (char) k);
-		}
-		insert(pattern, (char) k);
-	  }
-
-	  public override void printStats(PrintStream @out)
-	  {
-		@out.println("Value space size = " + Convert.ToString(vspace.length()));
-		base.printStats(@out);
-
-	  }
-	}
-
+	public class HyphenationTree : TernaryTree, IPatternConsumer
+    {
+
+        /// <summary>
+        /// value space: stores the interletter values
+        /// </summary>
+        protected internal ByteVector vspace;
+
+        /// <summary>
+        /// This map stores hyphenation exceptions
+        /// </summary>
+        protected internal Dictionary<string, List<object>> stoplist;
+
+        /// <summary>
+        /// This map stores the character classes
+        /// </summary>
+        protected internal TernaryTree classmap;
+
+        /// <summary>
+        /// Temporary map to store interletter values on pattern loading.
+        /// </summary>
+        [NonSerialized]
+        private TernaryTree ivalues;
+
+        public HyphenationTree()
+        {
+            stoplist = new Dictionary<string, List<object>>(23); // usually a small table
+            classmap = new TernaryTree();
+            vspace = new ByteVector();
+            vspace.Alloc(1); // this reserves index 0, which we don't use
+        }
+
+        /// <summary>
+        /// Packs the values by storing them in 4 bits, two values into a byte Values
+        /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
+        /// value.
+        /// </summary>
+        /// <param name="values"> a string of digits from '0' to '9' representing the
+        ///        interletter values. </param>
+        /// <returns> the index into the vspace array where the packed values are stored. </returns>
+        protected internal virtual int PackValues(string values)
+        {
+            int i, n = values.Length;
+            int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+            int offset = vspace.Alloc(m);
+            sbyte[] va = vspace.Array;
+            for (i = 0; i < n; i++)
+            {
+                int j = i >> 1;
+                sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
+                if ((i & 1) == 1)
+                {
+                    va[j + offset] = (sbyte)(va[j + offset] | v);
+                }
+                else
+                {
+                    va[j + offset] = (sbyte)(v << 4); // big endian
+                }
+            }
+            va[m - 1 + offset] = 0; // terminator
+            return offset;
+        }
+
+        protected internal virtual string UnpackValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1 + '0');
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            return buf.ToString();
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename)
+        {
+            LoadPatterns(filename, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename, Encoding encoding)
+        {
+            var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f)
+        {
+            LoadPatterns(f, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f, Encoding encoding)
+        {
+            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source)
+        {
+            LoadPatterns(source, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source, Encoding encoding)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new PatternParser.DtdResolver()
+            }))
+            {
+                LoadPatterns(reader);
+            }
+        }
+
+        public virtual void LoadPatterns(XmlReader source)
+        {
+            PatternParser pp = new PatternParser(this);
+            ivalues = new TernaryTree();
+
+            pp.Parse(source);
+
+            // patterns/values should be now in the tree
+            // let's optimize a bit
+            TrimToSize();
+            vspace.TrimToSize();
+            classmap.TrimToSize();
+
+            // get rid of the auxiliary map
+            ivalues = null;
+        }
+
+        public virtual string FindPattern(string pat)
+        {
+            int k = base.Find(pat);
+            if (k >= 0)
+            {
+                return UnpackValues(k);
+            }
+            return "";
+        }
+
+        /// <summary>
+        /// String compare, returns 0 if equal or t is a substring of s
+        /// </summary>
+        protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
+        {
+            for (; s[si] == t[ti]; si++, ti++)
+            {
+                if (s[si] == 0)
+                {
+                    return 0;
+                }
+            }
+            if (t[ti] == 0)
+            {
+                return 0;
+            }
+            return s[si] - t[ti];
+        }
+
+        protected internal virtual sbyte[] GetValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)(((int)((uint)v >> 4)) - 1);
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1);
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            sbyte[] res = new sbyte[buf.Length];
+            for (int i = 0; i < res.Length; i++)
+            {
+                res[i] = (sbyte)buf[i];
+            }
+            return res;
+        }
+
+        /// <summary>
+        /// <para>
+        /// Search for all possible partial matches of word starting at index an update
+        /// interletter values. In other words, it does something like:
+        /// </para>
+        /// <code>
+        /// for(i=0; i&lt;patterns.length; i++) {
+        /// if ( word.substring(index).startsWidth(patterns[i]) )
+        /// update_interletter_values(patterns[i]);
+        /// }
+        /// </code>
+        /// <para>
+        /// But it is done in an efficient way since the patterns are stored in a
+        /// ternary tree. In fact, this is the whole purpose of having the tree: doing
+        /// this search without having to test every single pattern. The number of
+        /// patterns for languages such as English range from 4000 to 10000. Thus,
+        /// doing thousands of string comparisons for each word to hyphenate would be
+        /// really slow without the tree. The tradeoff is memory, but using a ternary
+        /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
+        /// It's also faster than using a hash table
+        /// </para>
+        /// </summary>
+        /// <param name="word"> null terminated word to match </param>
+        /// <param name="index"> start index from word </param>
+        /// <param name="il"> interletter values array to update </param>
+        protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
+        {
+            sbyte[] values;
+            int i = index;
+            char p, q;
+            char sp = word[i];
+            p = root;
+
+            while (p > 0 && p < sc.Length)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
+                    {
+                        values = GetValues(eq[p]); // data pointer is in eq[]
+                        int j = index;
+                        for (int k = 0; k < values.Length; k++)
+                        {
+                            if (j < il.Length && values[k] > il[j])
+                            {
+                                il[j] = values[k];
+                            }
+                            j++;
+                        }
+                    }
+                    return;
+                }
+                int d = sp - sc[p];
+                if (d == 0)
+                {
+                    if (sp == 0)
+                    {
+                        break;
+                    }
+                    sp = word[++i];
+                    p = eq[p];
+                    q = p;
+
+                    // look for a pattern ending at this position by searching for
+                    // the null char ( splitchar == 0 )
+                    while (q > 0 && q < sc.Length)
+                    {
+                        if (sc[q] == 0xFFFF) // stop at compressed branch
+                        {
+                            break;
+                        }
+                        if (sc[q] == 0)
+                        {
+                            values = GetValues(eq[q]);
+                            int j = index;
+                            for (int k = 0; k < values.Length; k++)
+                            {
+                                if (j < il.Length && values[k] > il[j])
+                                {
+                                    il[j] = values[k];
+                                }
+                                j++;
+                            }
+                            break;
+                        }
+                        else
+                        {
+                            q = lo[q];
+
+                            /// <summary>
+                            /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
+                            /// java chars are unsigned
+                            /// </summary>
+                        }
+                    }
+                }
+                else
+                {
+                    p = d < 0 ? lo[p] : hi[p];
+                }
+            }
+        }
+
+        /// <summary>
+        /// Hyphenate word and return a Hyphenation object.
+        /// </summary>
+        /// <param name="word"> the word to be hyphenated </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
+        {
+            char[] w = word.ToCharArray();
+            return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
+        }
+
+        /// <summary>
+        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
+        /// may be absent, the first n is at offset, the first l is at offset +
+        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
+        /// into word. In the first part of the routine len = w.length, in the second
+        /// part of the routine len = word.length. Three indices are used: index(w),
+        /// the index in w, index(word), the index in word, letterindex(word), the
+        /// index in the letter part of word. The following relations exist: index(w) =
+        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
+        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
+        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
+        /// iIgnoreAtBeginning
+        /// </summary>
+
+        /// <summary>
+        /// Hyphenate word and return an array of hyphenation points.
+        /// </summary>
+        /// <param name="w"> char array that contains the word </param>
+        /// <param name="offset"> Offset to first character in word </param>
+        /// <param name="len"> Length of word </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
+        {
+            int i;
+            char[] word = new char[len + 3];
+
+            // normalize word
+            char[] c = new char[2];
+            int iIgnoreAtBeginning = 0;
+            int iLength = len;
+            bool bEndOfLetters = false;
+            for (i = 1; i <= len; i++)
+            {
+                c[0] = w[offset + i - 1];
+                int nc = classmap.Find(c, 0);
+                if (nc < 0) // found a non-letter character ...
+                {
+                    if (i == (1 + iIgnoreAtBeginning))
+                    {
+                        // ... before any letter character
+                        iIgnoreAtBeginning++;
+                    }
+                    else
+                    {
+                        // ... after a letter character
+                        bEndOfLetters = true;
+                    }
+                    iLength--;
+                }
+                else
+                {
+                    if (!bEndOfLetters)
+                    {
+                        word[i - iIgnoreAtBeginning] = (char)nc;
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                }
+            }
+            len = iLength;
+            if (len < (remainCharCount + pushCharCount))
+            {
+                // word is too short to be hyphenated
+                return null;
+            }
+            int[] result = new int[len + 1];
+            int k = 0;
+
+            // check exception list first
+            string sw = new string(word, 1, len);
+            if (stoplist.ContainsKey(sw))
+            {
+                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
+                // null)
+                List<object> hw = stoplist[sw];
+                int j = 0;
+                for (i = 0; i < hw.Count; i++)
+                {
+                    object o = hw[i];
+                    // j = index(sw) = letterindex(word)?
+                    // result[k] = corresponding index(w)
+                    if (o is string)
+                    {
+                        j += ((string)o).Length;
+                        if (j >= remainCharCount && j < (len - pushCharCount))
+                        {
+                            result[k++] = j + iIgnoreAtBeginning;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // use algorithm to get hyphenation points
+                word[0] = '.'; // word start marker
+                word[len + 1] = '.'; // word end marker
+                word[len + 2] = (char)0; // null terminated
+                sbyte[] il = new sbyte[len + 3]; // initialized to zero
+                for (i = 0; i < len + 1; i++)
+                {
+                    SearchPatterns(word, i, il);
+                }
+
+                // hyphenation points are located where interletter value is odd
+                // i is letterindex(word),
+                // i + 1 is index(word),
+                // result[k] = corresponding index(w)
+                for (i = 0; i < len; i++)
+                {
+                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
+                    {
+                        result[k++] = i + iIgnoreAtBeginning;
+                    }
+                }
+            }
+
+            if (k > 0)
+            {
+                // trim result array
+                int[] res = new int[k + 2];
+                Array.Copy(result, 0, res, 1, k);
+                // We add the synthetical hyphenation points
+                // at the beginning and end of the word
+                res[0] = 0;
+                res[k + 1] = len;
+                return new Hyphenation(res);
+            }
+            else
+            {
+                return null;
+            }
+        }
+
+        /// <summary>
+        /// Add a character class to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
+        /// Character classes define the valid word characters for hyphenation. If a
+        /// word contains a character not defined in any of the classes, it is not
+        /// hyphenated. It also defines a way to normalize the characters in order to
+        /// compare them with the stored patterns. Usually pattern files use only lower
+        /// case characters, in this case a class for letter 'a', for example, should
+        /// be defined as "aA", the first character being the normalization char.
+        /// </summary>
+        public virtual void AddClass(string chargroup)
+        {
+            if (chargroup.Length > 0)
+            {
+                char equivChar = chargroup[0];
+                char[] key = new char[2];
+                key[1] = (char)0;
+                for (int i = 0; i < chargroup.Length; i++)
+                {
+                    key[0] = chargroup[i];
+                    classmap.Insert(key, 0, equivChar);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Add an exception to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
+        /// hyphenation exceptions.
+        /// </summary>
+        /// <param name="word"> normalized word </param>
+        /// <param name="hyphenatedword"> a vector of alternating strings and
+        ///        <seealso cref="Hyphen hyphen"/> objects. </param>
+        public virtual void AddException(string word, List<object> hyphenatedword)
+        {
+            stoplist[word] = hyphenatedword;
+        }
+
+        /// <summary>
+        /// Add a pattern to the tree. Mainly, to be used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
+        /// the tree.
+        /// </summary>
+        /// <param name="pattern"> the hyphenation pattern </param>
+        /// <param name="ivalue"> interletter weight values indicating the desirability and
+        ///        priority of hyphenating at a given point within the pattern. It
+        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
+        public virtual void AddPattern(string pattern, string ivalue)
+        {
+            int k = ivalues.Find(ivalue);
+            if (k <= 0)
+            {
+                k = PackValues(ivalue);
+                ivalues.Insert(ivalue, (char)k);
+            }
+            Insert(pattern, (char)k);
+        }
+
+        // public override void printStats(PrintStream @out)
+        // {
+        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
+        //base.printStats(@out);
+
+        // }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
index 762b832..069badd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternConsumer.cs
@@ -1,31 +1,31 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-using System.Collections.Generic;
+\ufeffusing System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This interface is used to connect the XML pattern file parser to the
-	/// hyphenation tree.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-	/// </summary>
-	public interface PatternConsumer
+    /// <summary>
+    /// This interface is used to connect the XML pattern file parser to the
+    /// hyphenation tree.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public interface IPatternConsumer
 	{
 
 	  /// <summary>
@@ -34,7 +34,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
 	  /// usually means to ignore case.
 	  /// </summary>
 	  /// <param name="chargroup"> character group </param>
-	  void addClass(string chargroup);
+	  void AddClass(string chargroup);
 
 	  /// <summary>
 	  /// Add a hyphenation exception. An exception replaces the result obtained by
@@ -42,15 +42,13 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
 	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
 	  /// and <seealso cref="Hyphen"/> instances
 	  /// </summary>
-	  void addException(string word, List<object> hyphenatedword);
+	  void AddException(string word, List<object> hyphenatedword);
 
 	  /// <summary>
 	  /// Add hyphenation patterns.
 	  /// </summary>
 	  /// <param name="pattern"> the pattern </param>
 	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
-	  void addPattern(string pattern, string values);
-
+	  void AddPattern(string pattern, string values);
 	}
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
index 1d012c4..e94e8cf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/PatternParser.cs
@@ -1,457 +1,484 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-using System;
-using System.Collections;
+\ufeffusing System;
 using System.Collections.Generic;
 using System.IO;
+using System.Linq;
 using System.Text;
+using System.Xml;
 
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-	// SAX
-    
-    // Java
-
-	/// <summary>
-	/// A SAX document handler to read and parse hyphenation patterns from a XML
-	/// file.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class PatternParser : DefaultHandler
-	{
-
-	  internal XMLReader parser;
-
-	  internal int currElement;
-
-	  internal PatternConsumer consumer;
-
-	  internal StringBuilder token;
-
-	  internal List<object> exception;
-
-	  internal char hyphenChar;
-
-	  internal string errMsg;
-
-	  internal const int ELEM_CLASSES = 1;
-
-	  internal const int ELEM_EXCEPTIONS = 2;
-
-	  internal const int ELEM_PATTERNS = 3;
-
-	  internal const int ELEM_HYPHEN = 4;
-
-	  public PatternParser()
-	  {
-		token = new StringBuilder();
-		parser = createParser();
-		parser.ContentHandler = this;
-		parser.ErrorHandler = this;
-		parser.EntityResolver = this;
-		hyphenChar = '-'; // default
-
-	  }
-
-	  public PatternParser(PatternConsumer consumer) : this()
-	  {
-		this.consumer = consumer;
-	  }
-
-	  public virtual PatternConsumer Consumer
-	  {
-		  set
-		  {
-			this.consumer = value;
-		  }
-	  }
-
-	  /// <summary>
-	  /// Parses a hyphenation pattern file.
-	  /// </summary>
-	  /// <param name="filename"> the filename </param>
-	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void parse(String filename) throws java.io.IOException
-	  public virtual void parse(string filename)
-	  {
-		parse(new InputSource(filename));
-	  }
-
-	  /// <summary>
-	  /// Parses a hyphenation pattern file.
-	  /// </summary>
-	  /// <param name="file"> the pattern file </param>
-	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void parse(java.io.File file) throws java.io.IOException
-	  public virtual void parse(File file)
-	  {
-		InputSource src = new InputSource(file.toURI().toASCIIString());
-		parse(src);
-	  }
-
-	  /// <summary>
-	  /// Parses a hyphenation pattern file.
-	  /// </summary>
-	  /// <param name="source"> the InputSource for the file </param>
-	  /// <exception cref="IOException"> In case of an exception while parsing </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void parse(org.xml.sax.InputSource source) throws java.io.IOException
-	  public virtual void parse(InputSource source)
-	  {
-		try
-		{
-		  parser.parse(source);
-		}
-		catch (SAXException e)
-		{
-		  throw new IOException(e);
-		}
-	  }
-
-	  /// <summary>
-	  /// Creates a SAX parser using JAXP
-	  /// </summary>
-	  /// <returns> the created SAX parser </returns>
-	  internal static XMLReader createParser()
-	  {
-		try
-		{
-		  SAXParserFactory factory = SAXParserFactory.newInstance();
-		  factory.NamespaceAware = true;
-		  return factory.newSAXParser().XMLReader;
-		}
-		catch (Exception e)
-		{
-		  throw new Exception("Couldn't create XMLReader: " + e.Message);
-		}
-	  }
-
-	  protected internal virtual string readToken(StringBuilder chars)
-	  {
-		string word;
-		bool space = false;
-		int i;
-		for (i = 0; i < chars.Length; i++)
-		{
-		  if (char.IsWhiteSpace(chars[i]))
-		  {
-			space = true;
-		  }
-		  else
-		  {
-			break;
-		  }
-		}
-		if (space)
-		{
-		  // chars.delete(0,i);
-		  for (int countr = i; countr < chars.Length; countr++)
-		  {
-			chars[countr - i] = chars[countr];
-		  }
-		  chars.Length = chars.Length - i;
-		  if (token.Length > 0)
-		  {
-			word = token.ToString();
-			token.Length = 0;
-			return word;
-		  }
-		}
-		space = false;
-		for (i = 0; i < chars.Length; i++)
-		{
-		  if (char.IsWhiteSpace(chars[i]))
-		  {
-			space = true;
-			break;
-		  }
-		}
-		token.Append(chars.ToString().Substring(0, i));
-		// chars.delete(0,i);
-		for (int countr = i; countr < chars.Length; countr++)
-		{
-		  chars[countr - i] = chars[countr];
-		}
-		chars.Length = chars.Length - i;
-		if (space)
-		{
-		  word = token.ToString();
-		  token.Length = 0;
-		  return word;
-		}
-		token.Append(chars);
-		return null;
-	  }
-
-	  protected internal static string getPattern(string word)
-	  {
-		StringBuilder pat = new StringBuilder();
-		int len = word.Length;
-		for (int i = 0; i < len; i++)
-		{
-		  if (!char.IsDigit(word[i]))
-		  {
-			pat.Append(word[i]);
-		  }
-		}
-		return pat.ToString();
-	  }
-
-	  protected internal virtual List<object> normalizeException(List<T1> ex)
-	  {
-		List<object> res = new List<object>();
-		for (int i = 0; i < ex.Count; i++)
-		{
-		  object item = ex[i];
-		  if (item is string)
-		  {
-			string str = (string) item;
-			StringBuilder buf = new StringBuilder();
-			for (int j = 0; j < str.Length; j++)
-			{
-			  char c = str[j];
-			  if (c != hyphenChar)
-			  {
-				buf.Append(c);
-			  }
-			  else
-			  {
-				res.Add(buf.ToString());
-				buf.Length = 0;
-				char[] h = new char[1];
-				h[0] = hyphenChar;
-				// we use here hyphenChar which is not necessarily
-				// the one to be printed
-				res.Add(new Hyphen(new string(h), null, null));
-			  }
-			}
-			if (buf.Length > 0)
-			{
-			  res.Add(buf.ToString());
-			}
-		  }
-		  else
-		  {
-			res.Add(item);
-		  }
-		}
-		return res;
-	  }
-
-	  protected internal virtual string getExceptionWord<T1>(List<T1> ex)
-	  {
-		StringBuilder res = new StringBuilder();
-		for (int i = 0; i < ex.Count; i++)
-		{
-		  object item = ex[i];
-		  if (item is string)
-		  {
-			res.Append((string) item);
-		  }
-		  else
-		  {
-			if (((Hyphen) item).noBreak != null)
-			{
-			  res.Append(((Hyphen) item).noBreak);
-			}
-		  }
-		}
-		return res.ToString();
-	  }
-
-	  protected internal static string getInterletterValues(string pat)
-	  {
-		StringBuilder il = new StringBuilder();
-		string word = pat + "a"; // add dummy letter to serve as sentinel
-		int len = word.Length;
-		for (int i = 0; i < len; i++)
-		{
-		  char c = word[i];
-		  if (char.IsDigit(c))
-		  {
-			il.Append(c);
-			i++;
-		  }
-		  else
-		  {
-			il.Append('0');
-		  }
-		}
-		return il.ToString();
-	  }
-
-	  //
-	  // EntityResolver methods
-	  //
-	  public override InputSource resolveEntity(string publicId, string systemId)
-	  {
-		// supply the internal hyphenation.dtd if possible
-		if ((systemId != null && systemId.matches("(?i).*\\bhyphenation.dtd\\b.*")) || ("hyphenation-info".Equals(publicId)))
-		{
-		  // System.out.println(this.getClass().getResource("hyphenation.dtd").toExternalForm());
-		  return new InputSource(this.GetType().getResource("hyphenation.dtd").toExternalForm());
-		}
-		return null;
-	  }
-
-	  //
-	  // ContentHandler methods
-	  //
-
-	  /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
-	  ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
-	  public override void startElement(string uri, string local, string raw, Attributes attrs)
-	  {
-		if (local.Equals("hyphen-char"))
-		{
-		  string h = attrs.getValue("value");
-		  if (h != null && h.Length == 1)
-		  {
-			hyphenChar = h[0];
-		  }
-		}
-		else if (local.Equals("classes"))
-		{
-		  currElement = ELEM_CLASSES;
-		}
-		else if (local.Equals("patterns"))
-		{
-		  currElement = ELEM_PATTERNS;
-		}
-		else if (local.Equals("exceptions"))
-		{
-		  currElement = ELEM_EXCEPTIONS;
-		  exception = new List<>();
-		}
-		else if (local.Equals("hyphen"))
-		{
-		  if (token.Length > 0)
-		  {
-			exception.Add(token.ToString());
-		  }
-		  exception.Add(new Hyphen(attrs.getValue("pre"), attrs.getValue("no"), attrs.getValue("post")));
-		  currElement = ELEM_HYPHEN;
-		}
-		token.Length = 0;
-	  }
-
-	  /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
-	  ///      java.lang.String, java.lang.String) </seealso>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public void endElement(String uri, String local, String raw)
-	  public override void endElement(string uri, string local, string raw)
-	  {
-
-		if (token.Length > 0)
-		{
-		  string word = token.ToString();
-		  switch (currElement)
-		  {
-			case ELEM_CLASSES:
-			  consumer.addClass(word);
-			  break;
-			case ELEM_EXCEPTIONS:
-			  exception.Add(word);
-			  exception = normalizeException(exception);
-			  consumer.addException(getExceptionWord(exception), (ArrayList) exception.clone());
-			  break;
-			case ELEM_PATTERNS:
-			  consumer.addPattern(getPattern(word), getInterletterValues(word));
-			  break;
-			case ELEM_HYPHEN:
-			  // nothing to do
-			  break;
-		  }
-		  if (currElement != ELEM_HYPHEN)
-		  {
-			token.Length = 0;
-		  }
-		}
-		if (currElement == ELEM_HYPHEN)
-		{
-		  currElement = ELEM_EXCEPTIONS;
-		}
-		else
-		{
-		  currElement = 0;
-		}
-
-	  }
-
-	  /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("unchecked") @Override public void characters(char ch[] , int start, int length)
-	  public override void characters(char[] ch, int start, int length)
-	  {
-		StringBuilder chars = new StringBuilder(length);
-		chars.Append(ch, start, length);
-		string word = readToken(chars);
-		while (word != null)
-		{
-		  // System.out.println("\"" + word + "\"");
-		  switch (currElement)
-		  {
-			case ELEM_CLASSES:
-			  consumer.addClass(word);
-			  break;
-			case ELEM_EXCEPTIONS:
-			  exception.Add(word);
-			  exception = normalizeException(exception);
-			  consumer.addException(getExceptionWord(exception), (ArrayList) exception.clone());
-			  exception.Clear();
-			  break;
-			case ELEM_PATTERNS:
-			  consumer.addPattern(getPattern(word), getInterletterValues(word));
-			  break;
-		  }
-		  word = readToken(chars);
-		}
-
-	  }
-
-	  /// <summary>
-	  /// Returns a string of the location.
-	  /// </summary>
-	  private string getLocationString(SAXParseException ex)
-	  {
-		StringBuilder str = new StringBuilder();
-
-		string systemId = ex.SystemId;
-		if (systemId != null)
-		{
-		  int index = systemId.LastIndexOf('/');
-		  if (index != -1)
-		  {
-			systemId = systemId.Substring(index + 1);
-		  }
-		  str.Append(systemId);
-		}
-		str.Append(':');
-		str.Append(ex.LineNumber);
-		str.Append(':');
-		str.Append(ex.ColumnNumber);
-
-		return str.ToString();
-
-	  } // getLocationString(SAXParseException):String
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A XMLReader document handler to read and parse hyphenation patterns from a XML
+    /// file.
+    /// 
+    /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
+    /// than a SAX parser.
+    /// </summary>
+    public class PatternParser
+    {
+        internal int currElement;
+
+        internal IPatternConsumer consumer;
+
+        internal StringBuilder token;
+
+        internal List<object> exception;
+
+        internal char hyphenChar;
+
+        internal string errMsg;
+
+        internal const int ELEM_CLASSES = 1;
+
+        internal const int ELEM_EXCEPTIONS = 2;
+
+        internal const int ELEM_PATTERNS = 3;
+
+        internal const int ELEM_HYPHEN = 4;
+
+        public PatternParser()
+        {
+            token = new StringBuilder();
+            hyphenChar = '-'; // default
+        }
+
+        public PatternParser(IPatternConsumer consumer) : this()
+        {
+            this.consumer = consumer;
+        }
+
+        public virtual IPatternConsumer Consumer
+        {
+            set
+            {
+                this.consumer = value;
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="filename"> the filename </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(string filename)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var src = XmlReader.Create(filename, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file)
+        {
+            Parse(file, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file, Encoding encoding)
+        {
+            using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(Stream xmlStream)
+        {
+            using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(XmlReader source)
+        {
+            source.MoveToContent();
+            while (source.Read())
+            {
+                ParseNode(source);
+            }
+        }
+
+        private void ParseNode(XmlReader node)
+        {
+            string uri, name, raw;
+            switch (node.NodeType)
+            {
+                case XmlNodeType.Element:
+
+                    // Element start
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    var attributes = GetAttributes(node);
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    this.StartElement(uri, name, raw, attributes);
+                    if (node.IsEmptyElement)
+                    {
+                        this.EndElement(uri, name, raw);
+                    }
+                    break;
+
+                case XmlNodeType.Text:
+
+                    this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
+                    break;
+
+                case XmlNodeType.EndElement:
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    // Element end
+                    this.EndElement(uri, name, raw);
+                    break;
+            }
+        }
+
+        private IDictionary<string, string> GetAttributes(XmlReader node)
+        {
+            var result = new Dictionary<string, string>();
+            if (node.HasAttributes)
+            {
+                for (int i = 0; i < node.AttributeCount; i++)
+                {
+                    node.MoveToAttribute(i);
+                    result.Add(node.Name, node.Value);
+                }
+            }
+
+            return result;
+        }
+
+        protected internal virtual string ReadToken(StringBuilder chars)
+        {
+            string word;
+            bool space = false;
+            int i;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            if (space)
+            {
+                // chars.delete(0,i);
+                for (int countr = i; countr < chars.Length; countr++)
+                {
+                    chars[countr - i] = chars[countr];
+                }
+                chars.Length = chars.Length - i;
+                if (token.Length > 0)
+                {
+                    word = token.ToString();
+                    token.Length = 0;
+                    return word;
+                }
+            }
+            space = false;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                    break;
+                }
+            }
+            token.Append(chars.ToString(0, i));
+            // chars.delete(0,i);
+            for (int countr = i; countr < chars.Length; countr++)
+            {
+                chars[countr - i] = chars[countr];
+            }
+            chars.Length = chars.Length - i;
+            if (space)
+            {
+                word = token.ToString();
+                token.Length = 0;
+                return word;
+            }
+            token.Append(chars);
+            return null;
+        }
+
+        protected internal static string GetPattern(string word)
+        {
+            StringBuilder pat = new StringBuilder();
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                if (!char.IsDigit(word[i]))
+                {
+                    pat.Append(word[i]);
+                }
+            }
+            return pat.ToString();
+        }
+
+        protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
+        {
+            List<object> res = new List<object>();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    string str = (string)item;
+                    StringBuilder buf = new StringBuilder();
+                    for (int j = 0; j < str.Length; j++)
+                    {
+                        char c = str[j];
+                        if (c != hyphenChar)
+                        {
+                            buf.Append(c);
+                        }
+                        else
+                        {
+                            res.Add(buf.ToString());
+                            buf.Length = 0;
+                            char[] h = new char[1];
+                            h[0] = hyphenChar;
+                            // we use here hyphenChar which is not necessarily
+                            // the one to be printed
+                            res.Add(new Hyphen(new string(h), null, null));
+                        }
+                    }
+                    if (buf.Length > 0)
+                    {
+                        res.Add(buf.ToString());
+                    }
+                }
+                else
+                {
+                    res.Add(item);
+                }
+            }
+            return res;
+        }
+
+        protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
+        {
+            StringBuilder res = new StringBuilder();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    res.Append((string)item);
+                }
+                else
+                {
+                    if (((Hyphen)item).noBreak != null)
+                    {
+                        res.Append(((Hyphen)item).noBreak);
+                    }
+                }
+            }
+            return res.ToString();
+        }
+
+        protected internal static string GetInterletterValues(string pat)
+        {
+            StringBuilder il = new StringBuilder();
+            string word = pat + "a"; // add dummy letter to serve as sentinel
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                char c = word[i];
+                if (char.IsDigit(c))
+                {
+                    il.Append(c);
+                    i++;
+                }
+                else
+                {
+                    il.Append('0');
+                }
+            }
+            return il.ToString();
+        }
+
+        /// <summary>
+        /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
+        /// rather than from the file system.
+        /// </summary>
+        internal class DtdResolver : XmlUrlResolver
+        {
+            public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
+            {
+                string dtdFilename = "hyphenation.dtd";
+                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault()))
+                {
+                    var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
+                    return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
+                }
+
+                return base.GetEntity(absoluteUri, role, ofObjectToReturn);
+            }
+        }
+
+        //
+        // ContentHandler methods
+        //
+
+        /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
+        ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
+        public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
+        {
+            if (local.Equals("hyphen-char"))
+            {
+                string h = attrs.ContainsKey("value") ? attrs["value"] : null;
+                if (h != null && h.Length == 1)
+                {
+                    hyphenChar = h[0];
+                }
+            }
+            else if (local.Equals("classes"))
+            {
+                currElement = ELEM_CLASSES;
+            }
+            else if (local.Equals("patterns"))
+            {
+                currElement = ELEM_PATTERNS;
+            }
+            else if (local.Equals("exceptions"))
+            {
+                currElement = ELEM_EXCEPTIONS;
+                exception = new List<object>();
+            }
+            else if (local.Equals("hyphen"))
+            {
+                if (token.Length > 0)
+                {
+                    exception.Add(token.ToString());
+                }
+                exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
+                currElement = ELEM_HYPHEN;
+            }
+            token.Length = 0;
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
+        ///      java.lang.String, java.lang.String) </seealso>
+        public void EndElement(string uri, string local, string raw)
+        {
+
+            if (token.Length > 0)
+            {
+                string word = token.ToString();
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                    case ELEM_HYPHEN:
+                        // nothing to do
+                        break;
+                }
+                if (currElement != ELEM_HYPHEN)
+                {
+                    token.Length = 0;
+                }
+            }
+            if (currElement == ELEM_HYPHEN)
+            {
+                currElement = ELEM_EXCEPTIONS;
+            }
+            else
+            {
+                currElement = 0;
+            }
+
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
+        public void Characters(char[] ch, int start, int length)
+        {
+            StringBuilder chars = new StringBuilder(length);
+            chars.Append(ch, start, length);
+            string word = ReadToken(chars);
+            while (!string.IsNullOrEmpty(word))
+            {
+                // System.out.println("\"" + word + "\"");
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        exception.Clear();
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                }
+                word = ReadToken(chars);
+            }
+
+        }
+    }
 }
\ No newline at end of file


[44/50] [abbrv] lucenenet git commit: Merge branch 'master' into analysis-work-2

Posted by sy...@apache.org.
Merge branch 'master' into analysis-work-2


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4d7b23c4
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4d7b23c4
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4d7b23c4

Branch: refs/heads/analysis-work
Commit: 4d7b23c4269f0348a37fd470a3339befc64332ec
Parents: f5d76c0 bb59767
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 03:06:13 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 03:06:13 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Search/Collector.cs         |   2 +-
 .../Taxonomy/WriterCache/NameIntCacheLRU.cs     |   3 +-
 src/Lucene.Net.Join/TermsCollector.cs           |   2 +-
 src/Lucene.Net.Join/TermsIncludingScoreQuery.cs |   6 +-
 src/Lucene.Net.Join/TermsWithScoreCollector.cs  |   9 --
 .../Function/ValueSources/TFValueSource.cs      |   8 +-
 .../ValueSources/TermFreqValueSource.cs         |   6 +-
 .../Codecs/Lucene41Ords/Lucene41WithOrds.cs     | 126 +++++++++++++++++++
 .../Lucene.Net.TestFramework.csproj             |   1 +
 .../Directory/TestDirectoryTaxonomyReader.cs    |  32 ++---
 .../Directory/TestDirectoryTaxonomyWriter.cs    |   1 -
 .../Taxonomy/TestTaxonomyFacetCounts.cs         |   4 +-
 .../Taxonomy/TestTaxonomyFacetSumValueSource.cs |   6 +-
 src/Lucene.Net.Tests.Join/TestJoinUtil.cs       |  16 +--
 .../BooleanFilterTest.cs                        |   1 -
 .../Codecs/Lucene41/TestBlockPostingsFormat3.cs |   2 +-
 .../core/Index/TestBinaryDocValuesUpdates.cs    |   2 +-
 .../core/Index/TestTermdocPerf.cs               |   2 +-
 .../core/Search/TestAutomatonQuery.cs           |   3 +-
 .../core/Util/Packed/TestEliasFanoSequence.cs   |   4 +-
 .../core/Util/TestPagedBytes.cs                 |   2 +-
 21 files changed, 172 insertions(+), 66 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4d7b23c4/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------


[11/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
index a057b58..b4a10dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData6.cs
@@ -47,6 +47,707 @@ namespace Lucene.Net.Analysis.En
         private KStemData6()
         {
         }
-        internal static string[] data = new string[] { "pedant", "pedantic", "pedantry", "peddle", "peddler", "pederast", "pederasty", "pedestal", "pedestrian", "pediatrician", "pediatrics", "pedicab", "pedicel", "pedicure", "pedigree", "pediment", "pedlar", "pedometer", "pee", "peek", "peekaboo", "peel", "peeler", "peelings", "peep", "peeper", "peephole", "peepul", "peer", "peerage", "peeress", "peerless", "peeve", "peevish", "peewit", "peg", "pejorative", "pekinese", "pekingese", "pekoe", "pelagic", "pelf", "pelican", "pellagra", "pellet", "pellucid", "pelmet", "pelota", "pelt", "pelvic", "pelvis", "pemican", "pemmican", "pen", "penal", "penalise", "penalize", "penalty", "penance", "pence", "penchant", "pencil", "pendant", "pendent", "pending", "pendulous", "pendulum", "penetrate", "penetrating", "penetration", "penetrative", "penguin", "penicillin", "peninsula", "penis", "penitent", "penitential", "penitentiary", "penknife", "penmanship", "pennant", "penniless", "pennon", "penny"
 , "pennyweight", "pennywort", "penology", "pension", "pensionable", "pensioner", "pensive", "pentagon", "pentagram", "pentameter", "pentateuch", "pentathlon", "pentecost", "penthouse", "penultimate", "penumbra", "penurious", "penury", "peon", "peony", "people", "pep", "pepper", "peppercorn", "peppermint", "peppery", "pepsin", "peptic", "per", "peradventure", "perambulate", "perambulator", "perceive", "percentage", "percentile", "perceptible", "perception", "perceptive", "perch", "perchance", "percipient", "percolate", "percolator", "percussion", "percussionist", "perdition", "peregrination", "peremptory", "perennial", "perfect", "perfectible", "perfection", "perfectionist", "perfectly", "perfidious", "perfidy", "perforate", "perforation", "perforce", "perform", "performance", "performer", "perfume", "perfumier", "perfunctory", "pergola", "perhaps", "perigee", "perihelion", "peril", "perilous", "perimeter", "period", "periodic", "periodical", "periods", "peripatetic", "peripheral", "
 periphery", "periphrasis", "periphrastic", "periscope", "perish", "perishable", "perisher", "perishing", "peristyle", "peritonitis", "periwig", "periwinkle", "perjure", "perjurer", "perjury", "perk", "perky", "perm", "permafrost", "permanence", "permanency", "permanent", "permanganate", "permeable", "permeate", "permissible", "permission", "permissive", "permit", "permutation", "permute", "pernicious", "pernickety", "pernod", "peroration", "peroxide", "perpendicular", "perpetrate", "perpetual", "perpetuate", "perpetuity", "perplex", "perplexed", "perplexity", "perquisite", "perry", "persecute", "persecution", "perseverance", "persevere", "persevering", "persian", "persiflage", "persimmon", "persist", "persistence", "persistent", "persnickety", "person", "persona", "personable", "personage", "personal", "personalise", "personalities", "personality", "personalize", "personally", "personification", "personify", "personnel", "perspective", "perspex", "perspicacious", "perspiration", "pe
 rspire", "persuade", "persuasion", "persuasive", "pert", "pertain", "pertinacious", "pertinent", "perturb", "perturbation", "peruke", "peruse", "pervade", "pervasive", "perverse", "perversion", "perversity", "pervert", "peseta", "pesky", "peso", "pessary", "pessimism", "pessimist", "pest", "pester", "pesticide", "pestiferous", "pestilence", "pestilent", "pestle", "pet", "petal", "petaled", "petalled", "petard", "peterman", "petite", "petition", "petitioner", "petrel", "petrifaction", "petrify", "petrochemical", "petrol", "petroleum", "petrology", "petticoat", "pettifogging", "pettish", "petty", "petulant", "petunia", "pew", "pewit", "pewter", "peyote", "pfennig", "phaeton", "phagocyte", "phalanx", "phalarope", "phallic", "phallus", "phantasmagoria", "phantasmal", "phantasy", "phantom", "pharaoh", "pharisaic", "pharisee", "pharmaceutical", "pharmacist", "pharmacology", "pharmacopoeia", "pharmacy", "pharyngitis", "pharynx", "phase", "phd", "pheasant", "phenobarbitone", "phenol", "phen
 omenal", "phenomenally", "phenomenon", "phew", "phi", "phial", "philander", "philanthropic", "philanthropist", "philanthropy", "philatelist", "philately", "philharmonic", "philhellene", "philippic", "philistine", "philological", "philologist", "philology", "philosopher", "philosophical", "philosophise", "philosophize", "philosophy", "philter", "philtre", "phizog", "phlebitis", "phlebotomy", "phlegm", "phlegmatic", "phlox", "phobia", "phoenician", "phoenix", "phone", "phoneme", "phonemic", "phonemics", "phonetic", "phonetician", "phonetics", "phoney", "phonic", "phonics", "phonograph", "phonology", "phony", "phooey", "phosphate", "phosphorescence", "phosphorescent", "phosphoric", "phosphorus", "photo", "photocopier", "photocopy", "photoelectric", "photogenic", "photograph", "photographer", "photographic", "photography", "photosensitive", "photosensitize", "photostat", "photosynthesis", "phototsensitise", "phrasal", "phrase", "phrasebook", "phraseology", "phrenetic", "phrenology", "ph
 thisis", "phut", "phylloxera", "phylum", "physic", "physical", "physically", "physician", "physicist", "physics", "physio", "physiognomy", "physiology", "physiotherapy", "physique", "pianissimo", "pianist", "piano", "pianola", "piaster", "piastre", "piazza", "pibroch", "picador", "picaresque", "piccalilli", "piccaninny", "piccolo", "pick", "pickaback", "pickaninny", "pickax", "pickaxe", "picked", "picker", "pickerel", "picket", "pickings", "pickle", "pickled", "pickpocket", "picky", "picnic", "picnicker", "pictorial", "picture", "pictures", "picturesque", "piddle", "piddling", "pidgin", "pie", "piebald", "piece", "piecemeal", "pieces", "piecework", "piecrust", "pied", "pier", "pierce", "piercing", "pierrot", "piety", "piezoelectric", "piffle", "piffling", "pig", "pigeon", "pigeonhole", "piggery", "piggish", "piggy", "piggyback", "piggybank", "pigheaded", "piglet", "pigment", "pigmentation", "pigmy", "pignut", "pigskin", "pigsticking", "pigsty", "pigswill", "pigtail", "pike", "pikest
 aff", "pilaster", "pilau", "pilchard", "pile", "piles", "pileup", "pilfer", "pilferage", "pilgrim", "pilgrimage", "pill", "pillage", "pillar", "pillbox", "pillion", "pillock", "pillory", "pillow", "pillowcase", "pilot", "pimento", "pimp", "pimpernel", "pimple", "pin", "pinafore", "pincer", "pincers", "pinch", "pinchbeck", "pinched", "pinchpenny", "pincushion", "pine", "pineal", "pineapple", "pinecone", "pinewood", "piney", "ping", "pinhead", "pinion", "pink", "pinkeye", "pinkie", "pinkish", "pinko", "pinky", "pinnace", "pinnacle", "pinnate", "pinny", "pinpoint", "pinprick", "pinstripe", "pint", "pinta", "pintable", "pinup", "pinwheel", "piny", "pioneer", "pious", "piousness", "pip", "pipal", "pipe", "pipeline", "piper", "pipes", "pipette", "piping", "pipit", "pippin", "pipsqueak", "piquant", "pique", "piquet", "piracy", "piranha", "pirate", "pirouette", "piscatorial", "pish", "piss", "pissed", "pistachio", "pistil", "pistol", "piston", "pit", "pitch", "pitchblende", "pitcher", "pitc
 hfork", "piteous", "pitfall", "pith", "pithead", "pithy", "pitiable", "pitiful", "pitiless", "pitman", "piton", "pittance", "pituitary", "pity", "pivot", "pivotal", "pixie", "pixilated", "pixy", "pizza", "pizzicato", "placard", "placate", "place", "placebo", "placed", "placekick", "placement", "placenta", "placid", "placket", "plagarise", "plagarize", "plagiarism", "plague", "plaguey", "plaice", "plaid", "plain", "plainly", "plainsman", "plainsong", "plainspoken", "plaint", "plaintiff", "plaintive", "plait", "plan", "planchette", "planet", "planetarium", "planetary", "plangent", "plank", "planking", "plankton", "planner", "plant", "plantain", "plantation", "planter", "plaque", "plash", "plasma", "plaster", "plasterboard", "plastered", "plasterer", "plastering", "plastic", "plasticine", "plasticity", "plastics", "plastron", "plate", "plateau", "platelayer", "platform", "plating", "platinum", "platitude", "platonic", "platoon", "platter", "platypus", "plaudit", "plausible", "play", "p
 layable", "playback", "playbill", "playboy", "player", "playful", "playgoer", "playground", "playgroup", "playhouse", "playmate", "playpen", "playroom", "playsuit", "plaything", "playtime", "playwright", "plaza", "plea", "pleach", "plead", "pleading", "pleadings", "pleasant", "pleasantry", "please", "pleased", "pleasing", "pleasurable", "pleasure", "pleat", "pleb", "plebeian", "plebiscite", "plectrum", "pled", "pledge", "pleistocene", "plenary", "plenipotentiary", "plenitude", "plenteous", "plentiful", "plenty", "pleonasm", "plethora", "pleurisy", "plexus", "pliable", "pliant", "pliers", "plight", "plimsoll", "plinth", "pliocene", "plod", "plodder", "plonk", "plop", "plosive", "plot", "plough", "ploughboy", "ploughman", "ploughshare", "plover", "plow", "plowboy", "plowman", "plowshare", "ploy", "pluck", "plucky", "plug", "plughole", "plum", "plumage", "plumb", "plumbago", "plumber", "plumbing", "plume", "plumed", "plummet", "plummy", "plump", "plunder", "plunge", "plunger", "plunk",
  "pluperfect", "plural", "pluralism", "plurality", "pluribus", "plus", "plush", "plushy", "pluto", "plutocracy", "plutocrat", "plutonium", "ply", "plywood", "pneumatic", "pneumoconiosis", "pneumonia", "poach", "poacher", "pock", "pocked", "pocket", "pocketbook", "pocketful", "pocketknife", "pockmark", "pockmarked", "pod", "podgy", "podiatry", "podium", "poem", "poesy", "poet", "poetaster", "poetess", "poetic", "poetical", "poetry", "pogrom", "poignancy", "poignant", "poinsettia", "point", "pointed", "pointer", "pointillism", "pointless", "points", "pointsman", "poise", "poised", "poison", "poisonous", "poke", "poker", "pokerwork", "poky", "polack", "polar", "polarisation", "polarise", "polarity", "polarization", "polarize", "polaroid", "polaroids", "polder", "pole", "poleax", "poleaxe", "polecat", "polemic", "polemical", "polemics", "police", "policeman", "policewoman", "policy", "polio", "polish", "polisher", "politburo", "polite", "politic", "politicalise", "politicalize", "politi
 cian", "politicise", "politicize", "politicking", "politico", "politics", "polity", "polka", "poll", "pollard", "pollen", "pollinate", "polling", "pollster", "pollutant", "pollute", "pollution", "polly", "pollyanna", "polo", "polonaise", "polony", "poltergeist", "poltroon", "poly", "polyandrous", "polyandry", "polyanthus", "polyester", "polyethylene", "polygamist", "polygamous", "polygamy", "polyglot", "polygon", "polymath", "polymer", "polymorphous", "polyp", "polyphony", "polypus", "polystyrene", "polysyllable", "polytechnic", "polytheism", "polythene", "polyurethane", "pomade", "pomander", "pomegranate", "pomeranian", "pommel", "pommy", "pomp", "pompom", "pomposity", "pompous", "ponce", "poncho", "poncy", "pond", "ponder", "ponderous", "pone", "pong", "poniard", "pontiff", "pontifical", "pontificals", "pontificate", "pontoon", "pony", "ponytail", "pooch", "poodle", "poof", "pooh", "pool", "poolroom", "pools", "poop", "pooped", "poor", "poorhouse", "poorly", "poorness", "poove", "
 pop", "popadam", "popadum", "popcorn", "popery", "popgun", "popinjay", "popish", "poplar", "poplin", "poppa", "popper", "poppet", "poppy", "poppycock", "popshop", "popsy", "populace", "popular", "popularise", "popularity", "popularize", "popularly", "populate", "population", "populism", "populist", "populous", "porcelain", "porch", "porcine", "porcupine", "pore", "pork", "porker", "porky", "porn", "pornography", "porosity", "porous", "porphyry", "porpoise", "porridge", "porringer", "port", "portable", "portage", "portal", "portals", "portcullis", "portend", "portent", "portentous", "porter", "porterage", "porterhouse", "portfolio", "porthole", "portico", "portion", "portly", "portmanteau", "portrait", "portraitist", "portraiture", "portray", "portrayal", "pose", "poser", "poseur", "posh", "posit", "position", "positional", "positive", "positively", "positiveness", "positivism", "positron", "posse", "possess", "possessed", "possession", "possessive", "possessor", "posset", "possibili
 ty", "possible", "possibly", "possum", "post", "postage", "postal", "postbag", "postbox", "postcard", "postcode", "postdate", "poster", "posterior", "posterity", "postern", "postgraduate", "posthaste", "posthumous", "postilion", "postillion", "posting", "postman", "postmark", "postmaster", "postmortem", "postpaid", "postpone", "postprandial", "postscript", "postulant", "postulate", "posture", "postwar", "posy", "pot", "potable", "potash", "potassium", "potation", "potato", "potbellied", "potbelly", "potboiler", "potbound", "poteen", "potency", "potent", "potentate", "potential", "potentiality", "pothead", "pother", "potherb", "pothole", "potholing", "pothouse", "pothunter", "potion", "potluck", "potpourri", "potsherd", "potshot", "pottage", "potted", "potter", "potteries", "pottery", "potty", "pouch", "pouf", "pouffe", "poulterer", "poultice", "poultry", "pounce", "pound", "poundage", "pounding", "pour", "pout", "poverty", "powder", "powdered", "powdery", "power", "powerboat", "powe
 rful", "powerhouse", "powerless", "powers", "powwow", "pox", "pps", "practicable", "practical", "practicality", "practically", "practice", "practiced", "practise", "practised", "practitioner", "praesidium", "praetor", "praetorian", "pragmatic", "pragmatism", "prairie", "praise", "praises", "praiseworthy", "praline", "pram", "prance", "prank", "prankster", "prat", "prate", "pratfall", "prattle", "prawn", "praxis", "pray", "prayer", "preach", "preachify", "preamble", "prearrange", "prebend", "prebendary", "precarious", "precast", "precaution", "precede", "precedence", "precedent", "preceding", "precentor", "precept", "preceptor", "precession", "precinct", "precincts", "preciosity", "precious", "precipice", "precipitate", "precipitation", "precipitous", "precise", "precisely", "precision", "preclude", "precocious", "precognition", "preconceived", "preconception", "precondition", "precook", "precursor", "predator", "predatory", "predecease", "predecessor", "predestinate", "predestinatio
 n", "predestine", "predetermine", "predeterminer", "predicament", "predicate", "predicative", "predict", "predictable", "prediction", "predigest", "predilection", "predispose", "predisposition", "predominance", "predominant", "predominantly", "predominate", "preeminent", "preeminently", "preempt", "preemption", "preemptive", "preen", "preexist", "preexistence", "prefab", "prefabricate", "prefabricated", "preface", "prefatory", "prefect", "prefecture", "prefer", "preferable", "preference", "preferential", "preferment", "prefigure", "prefix", "pregnancy", "pregnant", "preheat", "prehensile", "prehistoric", "prehistory", "prejudge", "prejudice", "prejudiced", "prejudicial", "prelacy", "prelate", "prelim", "preliminary", "prelims", "preliterate", "prelude", "premarital", "premature", "premeditate", "premeditated", "premier", "premise", "premises", "premiss", "premium", "premonition", "premonitory", "prenatal", "prentice", "preoccupation", "preoccupied", "preoccupy", "preordain", "prep",
  "prepack", "preparation", "preparatory", "prepare", "prepared", "preparedness", "prepay", "preponderance", "preponderant", "preponderate", "preposition", "prepositional", "prepossessed", "prepossessing", "prepossession", "preposterous", "prepuce", "prerecord", "prerequisite", "prerogative", "presage", "presbyter", "presbyterian", "presbytery", "preschool", "prescient", "prescribe", "prescribed", "prescript", "prescription", "prescriptive", "presence", "present", "presentable", "presentation", "presenter", "presentiment", "presently", "presents", "preservable", "preservation", "preservative", "preserve", "preserver", "preset", "preshrunk", "preside", "presidency", "president", "presidential", "presidium", "press", "pressed", "pressgang", "pressing", "pressman", "pressmark", "pressure", "pressurise", "pressurize", "prestidigitation", "prestige", "prestigious", "prestissimo", "presto", "prestressed", "presumable", "presume", "presumption", "presumptive", "presumptuous", "presuppose", 
 "presupposition", "pretence", "pretend", "pretended", "pretender", "pretense", "pretension", "pretentious", "pretentiousness", "preterit", "preterite", "preternatural", "pretext", "pretor", "pretorian", "prettify", "prettily", "pretty", "pretzel", "prevail", "prevailing", "prevalent", "prevaricate", "prevent", "prevention", "preventive", "preview", "previous", "prevision", "prewar", "prey", "price", "priceless", "pricey", "prick", "prickle", "prickly", "pricy", "pride", "priest", "priesthood", "priestly", "prig", "priggish", "prim", "primacy", "primaeval", "primal", "primarily", "primary", "primate", "prime", "primer", "primeval", "priming", "primitive", "primogeniture", "primordial", "primp", "primrose", "primula", "primus", "prince", "princedom", "princely", "princess", "principal", "principality", "principally", "principle", "principled", "principles", "prink", "print", "printable", "printer", "printing", "printout", "prior", "priority", "priory", "prise", "prism", "prismatic", "
 prison", "prisoner", "prissy", "pristine", "prithee", "privacy", "private", "privateer", "privation", "privet", "privilege", "privileged", "privily", "privy", "prize", "prizefight", "prizeman", "pro", "probability", "probable", "probably", "probate", "probation", "probationer", "probe", "probity", "problem", "problematic", "proboscis", "procedural", "procedure", "proceed", "proceeding", "proceedings", "proceeds", "process", "procession", "processional", "proclaim", "proclamation", "proclivity", "proconsul", "proconsulate", "procrastinate", "procreate", "proctor", "procure", "procurer", "prod", "prodigal", "prodigious", "prodigy", "produce", "producer", "product", "production", "productive", "productivity", "proem", "prof", "profanation", "profane", "profanity", "profess", "professed", "professedly", "profession", "professional", "professionalism", "professor", "professorial", "professorship", "proffer", "proficient", "profile", "profit", "profitable", "profiteer", "profligacy", "pro
 fligate", "profound", "profundity", "profuse", "profusion", "progenitor", "progeny", "progesterone", "prognathous", "prognosis", "prognostic", "prognosticate", "prognostication", "program", "programer", "programmer", "progress", "progression", "progressive", "prohibit", "prohibition", "prohibitionist", "prohibitive", "prohibitory", "project", "projectile", "projection", "projectionist", "projector", "prolapse", "prole", "prolegomena", "proletarian", "proletariat", "proliferate", "proliferation", "prolific", "prolix", "prolog", "prologue", "prolong", "prolongation", "prolonged", "prom", "promenade", "promenader", "prominence", "prominent", "promiscuity", "promiscuous", "promise", "promising", "promontory", "promote", "promoter", "promotion", "prompt", "prompter", "promptness", "promulgate", "pron", "prone", "prong", "pronominal", "pronoun", "pronounce", "pronounceable", "pronounced", "pronouncement", "pronto", "pronunciamento", "pronunciation", "proof", "proofread", "prop", "propagan
 da", "propagandise", "propagandist", "propagandize", "propagate", "propagation", "propane", "propel", "propellant", "propellent", "propeller", "propensity", "proper", "properly", "propertied", "property", "prophecy", "prophesy", "prophet", "prophetess", "prophetic", "prophets", "prophylactic", "prophylaxis", "propinquity", "propitiate", "propitiatory", "propitious", "propjet", "proponent", "proportion", "proportional", "proportionate", "proportions", "proposal", "propose", "proposition", "propound", "proprietary", "proprieties", "proprietor", "proprietress", "propriety", "propulsion", "propulsive", "propylene", "prorogation", "prorogue", "prosaic", "proscenium", "proscribe", "proscription", "prose", "prosecute", "prosecution", "prosecutor", "proselyte", "proselytise", "proselytize", "prosody", "prospect", "prospective", "prospector", "prospects", "prospectus", "prosper", "prosperity", "prosperous", "prostate", "prosthesis", "prostitute", "prostitution", "prostrate", "prostration", "
 prosy", "protagonist", "protean", "protect", "protection", "protectionism", "protective", "protector", "protectorate", "protein", "protest", "protestant", "protestation", "protocol", "proton", "protoplasm", "prototype", "protozoa", "protozoan", "protozoon", "protract", "protraction", "protractor", "protrude", "protrusion", "protrusive", "protuberance", "protuberant", "proud", "provable", "prove", "proven", "provenance", "provender", "proverb", "proverbial", "proverbially", "proverbs", "provide", "provided", "providence", "provident", "providential", "provider", "providing", "province", "provinces", "provincial", "provision", "provisional", "provisions", "proviso", "provocation", "provocative", "provoke", "provoking", "provost", "prow", "prowess", "prowl", "prowler", "prox", "proximal", "proximate", "proximity", "proximo", "proxy", "prude", "prudence", "prudent", "prudential", "prudery", "prudish", "prune", "pruning", "prurience", "prurient", "pruritus", "prussian", "pry", "psalm", "
 psalmist", "psalmody", "psalms", "psalter", "psaltery", "psephology", "pseud", "pseudonym", "pseudonymous", "pshaw", "psittacosis", "psoriasis", "psst", "psyche", "psychedelic", "psychiatric", "psychiatrist", "psychiatry", "psychic", "psycho", "psychoanalyse", "psychoanalysis", "psychoanalyst", "psychoanalytic", "psychoanalyze", "psychokinesis", "psychological", "psychologist", "psychology", "psychopath", "psychosis", "psychosomatic", "psychotherapy", "psychotic", "pta", "ptarmigan", "pterodactyl", "pto", "ptomaine", "pub", "puberty", "pubic", "public", "publican", "publication", "publicise", "publicist", "publicity", "publicize", "publish", "publisher", "publishing", "puce", "puck", "pucker", "puckish", "pud", "pudding", "puddle", "pudendum", "pudgy", "pueblo", "puerile", "puerility", "puerperal", "puff", "puffball", "puffed", "puffer", "puffin", "puffy", "pug", "pugilism", "pugilist", "pugnacious", "pugnacity", "puissance", "puissant", "puke", "pukka", "pulchritude", "pulchritudin
 ous", "pule", "pull", "pullback", "pullet", "pulley", "pullman", "pullout", "pullover", "pullthrough", "pullulate", "pulmonary", "pulp", "pulpit", "pulsar", "pulsate", "pulsation", "pulse", "pulverise", "pulverize", "puma", "pumice", "pummel", "pump", "pumpernickel", "pumpkin", "pun", "punch", "punchy", "punctilio", "punctilious", "punctual", "punctuate", "punctuation", "puncture", "pundit", "pungent", "punic", "punish", "punishable", "punishing", "punishment", "punitive", "punjabi", "punk", "punkah", "punnet", "punster", "punt", "puny", "pup", "pupa", "pupate", "pupil", "puppet", "puppeteer", "puppy", "purblind", "purchase", "purchaser", "purdah", "pure", "pureblooded", "purebred", "puree", "purely", "pureness", "purgation", "purgative", "purgatory", "purge", "purification", "purify", "purist", "puritan", "puritanical", "purity", "purl", "purler", "purlieus", "purloin", "purple", "purplish", "purport", "purpose", "purposeful", "purposeless", "purposely", "purposive", "purr", "purse
 ", "purser", "pursuance", "pursue", "pursuer", "pursuit", "purulent", "purvey", "purveyance", "purveyor", "purview", "pus", "push", "pushbike", "pushcart", "pushchair", "pushed", "pusher", "pushover", "pushy", "pusillanimous", "puss", "pussy", "pussycat", "pussyfoot", "pustule", "put", "putative", "putrefaction", "putrefactive", "putrefy", "putrescent", "putrid", "putsch", "putt", "puttee", "putter", "putto", "putty", "puzzle", "puzzlement", "puzzler", "pvc", "pygmy", "pyjama", "pyjamas", "pylon", "pyorrhea", "pyorrhoea", "pyramid", "pyre", "pyrex", "pyrexia", "pyrites", "pyromania", "pyromaniac", "pyrotechnic", "pyrotechnics", "python", "pyx", "qed", "qty", "qua", "quack", "quackery", "quad", "quadragesima", "quadrangle", "quadrangular", "quadrant", "quadrilateral", "quadrille", "quadrillion", "quadroon", "quadruped", "quadruple", "quadruplet", "quadruplicate", "quaff", "quagga", "quagmire", "quail", "quaint", "quake", "quaker", "qualification", "qualifications", "qualified", "qual
 ifier", "qualify", "qualitative", "quality", "qualm", "quandary", "quantify", "quantitative", "quantity", "quantum", "quarantine", "quark", "quarrel", "quarrelsome", "quarry", "quart", "quarter", "quarterdeck", "quarterfinal", "quartering", "quarterly", "quartermaster", "quarters", "quarterstaff", "quartet", "quartette", "quarto", "quartz", "quasar", "quash", "quatercentenary", "quatrain", "quaver", "quay", "quean", "queasy", "queen", "queenly", "queer", "quell", "quench", "quenchless", "querulous", "query", "quest", "question", "questionable", "questioner", "questioning", "questionnaire", "quetzal", "queue", "quibble", "quick", "quicken", "quickie", "quicklime", "quicksand", "quicksilver", "quickstep", "quid", "quiescent", "quiet", "quieten", "quietism", "quietude", "quietus", "quiff", "quill", "quilt", "quilted", "quin", "quince", "quinine", "quinquagesima", "quinsy", "quintal", "quintessence", "quintet", "quintette", "quintuplet", "quip", "quire", "quirk", "quisling", "quit", "qu
 its", "quittance", "quitter", "quiver", "quixotic", "quiz", "quizmaster", "quizzical", "quod", "quoit", "quoits", "quondam", "quorum", "quota", "quotable", "quotation", "quote", "quoth", "quotidian", "quotient", "rabbi", "rabbinical", "rabbit", "rabble", "rabelaisian", "rabid", "rabies", "rac", "raccoon", "race", "racecourse", "racehorse", "raceme", "racer", "races", "racetrack", "racial", "racialism", "racially", "racing", "rack", "racket", "racketeer", "racketeering", "rackets", "raconteur", "racoon", "racquet", "racquets", "racy", "radar", "radial", "radiance", "radiant", "radiate", "radiation", "radiator", "radical", "radicalise", "radicalism", "radicalize", "radicle", "radii", "radio", "radioactive", "radioactivity", "radiogram", "radiograph", "radiographer", "radiography", "radioisotope", "radiolocation", "radiology", "radiotherapist", "radiotherapy", "radish", "radium", "radius", "raffia", "raffish", "raffle", "raft", "rafter", "raftered", "raftsman", "rag", "raga", "ragamuff
 in", "ragbag", "rage", "ragged", "raglan", "ragout", "ragtag", "ragtime", "raid", "raider", "rail", "railhead", "railing", "raillery", "railroad", "rails", "railway", "raiment", "rain", "rainbow", "raincoat", "raindrop", "rainfall", "rainproof", "rains", "rainstorm", "rainwater", "rainy", "raise", "raisin", "raj", "raja", "rajah", "rake", "rakish", "rallentando", "rally", "ram", "ramadan", "ramble", "rambler", "rambling", "rambunctious", "ramekin", "ramification", "ramify", "ramjet", "ramp", "rampage", "rampant", "rampart", "ramrod", "ramshackle", "ran", "ranch", "rancher", "rancid", "rancor", "rancorous", "rancour", "rand", "random", "randy", "ranee", "rang", "range", "ranger", "rani", "rank", "ranker", "ranking", "rankle", "ranks", "ransack", "ransom", "rant", "rap", "rapacious", "rapacity", "rape", "rapid", "rapids", "rapier", "rapine", "rapist", "rapport", "rapprochement", "rapscallion", "rapt", "rapture", "rapturous", "rare", "rarebit", "rarefied", "rarefy", "rarely", "raring",
  "rarity", "rascal", "rascally", "rash", "rasher", "rasp", "raspberry", "rat", "ratable", "ratchet", "rate", "rateable", "ratepayer", "rather", "ratify", "rating", "ratio", "ratiocination", "ration", "rational", "rationale", "rationalise", "rationalism", "rationalist", "rationalize", "rations", "ratlin", "ratline", "rats", "rattan", "ratter", "rattle", "rattlebrained", "rattlesnake", "rattletrap", "rattling", "ratty", "raucous", "raunchy", "ravage", "ravages", "rave", "ravel", "raven", "ravening", "ravenous", "raver", "ravine", "raving", "ravings", "ravioli", "ravish", "ravishing", "ravishment", "raw", "rawhide", "ray", "rayon", "raze", "razor", "razorback", "razzle", "reach", "react", "reaction", "reactionary", "reactivate", "reactive", "reactor", "read", "readable", "readdress", "reader", "readership", "readily", "readiness", "reading", "readjust", "readout", "ready", "reafforest", "reagent", "real", "realign", "realisable", "realisation", "realise", "realism", "realist", "realist
 ic", "reality", "realizable", "realization", "realize", "really", "realm", "realpolitik", "realtor", "realty", "ream", "reanimate", "reap", "reaper", "reappear", "reappraisal", "rear", "rearguard", "rearm", "rearmament", "rearmost", "rearrange", "rearward", "rearwards", "reason", "reasonable", "reasonably", "reasoned", "reasoning", "reassure", "rebarbative", "rebate", "rebel", "rebellion", "rebellious", "rebind", "rebirth", "reborn", "rebound", "rebuff", "rebuild", "rebuke", "rebus", "rebut", "rebuttal", "recalcitrance", "recalcitrant", "recall", "recant", "recap", "recapitulate", "recapitulation", "recapture", "recast", "recce", "recd", "recede", "receipt", "receipts", "receivable", "receive", "received", "receiver", "receivership", "receiving", "recent", "recently", "receptacle", "reception", "receptionist", "receptive", "recess", "recession", "recessional", "recessive", "recharge", "recidivist", "recipe", "recipient", "reciprocal", "reciprocate", "reciprocity", "recital", "recita
 tion", "recitative", "recite", "reck", "reckless", "reckon", "reckoner", "reckoning", "reclaim", "reclamation", "recline", "recluse", "recognise", "recognition", "recognizance", "recognize", "recoil", "recollect", "recollection", "recommend", "recommendation", "recompense", "reconcile", "reconciliation", "recondite", "recondition", "reconnaissance", "reconnoiter", "reconnoitre", "reconsider", "reconstitute", "reconstruct", "reconstruction", "record", "recorder", "recording", "recordkeeping", "recount", "recoup", "recourse", "recover", "recovery", "recreant", "recreate", "recreation", "recreational", "recriminate", "recrimination", "recrudescence", "recruit", "rectal", "rectangle", "rectangular", "rectification", "rectifier", "rectify", "rectilinear", "rectitude", "recto", "rector", "rectory", "rectum", "recumbent", "recuperate", "recuperative", "recur", "recurrence", "recurrent", "recurved", "recusant", "recycle", "red", "redbreast", "redbrick", "redcap", "redcoat", "redcurrant", "r
 edden", "reddish", "redecorate", "redeem", "redeemer", "redemption", "redemptive", "redeploy", "redhead", "rediffusion", "redirect", "redistribute", "redo", "redolence", "redolent", "redouble", "redoubt", "redoubtable", "redound", "redress", "redskin", "reduce", "reduction", "redundancy", "redundant", "reduplicate", "redwing", "redwood", "reecho", "reed", "reeds", "reeducate", "reedy", "reef", "reefer", "reek", "reel", "reentry", "reeve", "ref", "reface", "refashion", "refectory", "refer", "referee", "reference", "referendum", "refill", "refine", "refined", "refinement", "refiner", "refinery", "refit", "reflate", "reflation", "reflect", "reflection", "reflective", "reflector", "reflex", "reflexes", "reflexive", "refloat", "refoot", "reforest", "reform", "reformation", "reformatory", "refract", "refractory", "refrain", "refresh", "refresher", "refreshing", "refreshment", "refreshments", "refrigerant", "refrigerate", "refrigeration", "refrigerator", "reft", "refuel", "refuge", "refuge
 e", "refulgence", "refulgent", "refund", "refurbish", "refusal", "refuse", "refutable", "refutation", "refute", "regain", "regal", "regale", "regalia", "regard", "regardful", "regarding", "regardless", "regards", "regatta", "regency", "regenerate", "regent", "reggae", "regicide", "regime", "regimen", "regiment", "regimental", "regimentals", "regina", "region", "regional", "regions", "register", "registrar", "registration", "registry", "regnant", "regress", "regressive", "regret", "regrets", "regrettable", "regrettably", "regroup", "regular", "regularise", "regularity", "regularize", "regularly", "regulate", "regulation", "regulator", "regulo", "regurgitate", "rehabilitate", "rehash", "rehear", "rehearsal", "rehearse", "rehouse", "reich", "reification", "reify", "reign", "reimburse", "reimbursement", "rein", "reincarnate", "reincarnation", "reindeer", "reinforce", "reinforcement", "reinforcements", "reins", "reinstate", "reinsure", "reissue", "reiterate", "reject", "rejection", "rejo
 ice", "rejoicing", "rejoicings", "rejoin", "rejoinder", "rejuvenate", "rekindle", "relaid", "relapse", "relate", "related", "relation", "relational", "relations", "relationship", "relative", "relatively", "relativism", "relativistic", "relativity", "relax", "relaxation", "relaxing", "relay", "release", "relegate", "relent", "relentless", "relevance", "relevant", "reliability", "reliable", "reliance", "reliant", "relic", "relics", "relict", "relief", "relieve", "relieved", "religion", "religious", "religiously", "reline", "relinquish", "reliquary", "relish", "relive", "reload", "relocate", "reluctance", "reluctant", "reluctantly", "rely", "remain", "remainder", "remains", "remake", "remand", "remark", "remarkable", "remarkably", "remarry", "remediable", "remedial", "remedy", "remember", "remembrance", "remilitarise", "remilitarize", "remind", "reminder", "reminisce", "reminiscence", "reminiscences", "reminiscent", "remiss", "remission", "remit", "remittance", "remittent", "remnant", 
 "remodel", "remold", "remonstrance", "remonstrate", "remorse", "remorseful", "remote", "remotely", "remould", "remount", "removal", "remove", "remover", "remunerate", "remunerative", "renaissance", "renal", "rename", "renascent", "rend", "render", "rendering", "rendezvous", "rendition", "renegade", "renege", "renegue", "renew", "renewable", "renewal", "rennet", "renounce", "renovate", "renown", "renowned", "rent", "rental", "renter", "rentier", "renunciation", "reopen", "reorganise", "reorganize", "rep", "repaid", "repair", "reparable", "reparation", "reparations", "repartee", "repast", "repatriate", "repay", "repayable", "repayment", "repeal", "repeat", "repeated", "repeatedly", "repeater", "repeating", "repel", "repellent", "repent", "repentance", "repentant", "repercussion", "repertoire", "repertory", "repetition", "repetitious", "repine", "replace", "replacement", "replay", "replenish", "replete", "repletion", "replica", "replicate", "reply", "repoint", "report", "reportage", "r
 eportedly", "reporter", "repose", "repository", "repossess", "repot", "repp", "reprehend", "reprehensible", "represent", "representation", "representational", "representations", "representative", "repress", "repressed", "repression", "repressive", "reprieve", "reprimand", "reprint", "reprisal", "reprise", "reproach", "reprobate", "reproduce", "reproducer", "reproduction", "reproductive", "reproof", "reprove", "reproving", "reptile", "reptilian", "republic", "republican", "republicanism", "repudiate", "repugnance", "repugnant", "repulse", "repulsion", "repulsive", "reputable", "reputation", "repute", "reputed", "reputedly", "request", "requiem", "require", "requirement", "requisite", "requisition", "requital", "requite", "reredos", "rerun", "rescind", "rescript", "rescue", "research", "reseat", "resemblance", "resemble", "resent", "resentment", "reservation", "reserve", "reserved", "reservedly", "reservist", "reservoir", "reset", "resettle", "reshuffle", "reside", "residence", "resid
 ency", "resident", "residential", "residual", "residuary", "residue", "resign", "resignation", "resigned", "resilience", "resilient", "resin", "resinated", "resist", "resistance", "resistant", "resistor", "resole", "resolute", "resolution", "resolvable", "resolve", "resonance", "resonant", "resonate", "resonator", "resort", "resound", "resounding", "resource", "resourceful", "resources", "respect", "respectability", "respectable", "respecter", "respectful", "respecting", "respective", "respectively", "respects", "respiration", "respirator", "respiratory", "respire", "respite", "resplendence", "resplendent", "respond", "respondent", "response", "responsibility", "responsible", "responsibly", "responsive", "rest", "restage", "restate", "restaurant", "restaurateur", "restful", "restitution", "restive", "restless", "restock", "restoration", "restorative", "restore", "restorer", "restrain", "restrained", "restraint", "restrict", "restricted", "restriction", "restrictive", "restructure", 
 "result", "resultant", "resume", "resumption", "resurface", "resurgence", "resurgent", "resurrect", "resurrection", "resuscitate", "retail", "retailer", "retain", "retainer", "retake", "retaliate", "retaliation", "retaliatory", "retard", "retarded", "retch", "retd", "retell", "retention", "retentive", "rethink", "reticence", "reticent", "reticulated", "reticulation", "reticule", "retina", "retinue", "retire", "retired", "retirement", "retiring", "retort", "retouch", "retrace", "retract", "retractable", "retractile", "retraction", "retread", "retreat", "retrench", "retrial", "retraining", "retribution", "retributive", "retrieval", "retrieve", "retriever", "retroactive", "retroflex", "retrograde", "retrogress", "retrogressive", "retrospect", "retrospection", "retrospective", "retroversion", "retsina", "return", "returnable", "returns", "reunion", "reunite", "reuse", "rev", "revalue", "revamp", "reveal", "revealing", "reveille", "revel", "revelation", "revelry", "revenge", "revenue", "
 reverberant", "reverberate", "reverberation", "revere", "reverence", "reverend", "reverent", "reverential", "reverie", "revers", "reversal", "reverse", "reversion", "reversionary", "revert", "revetment", "review", "reviewer", "revile", "revise", "revision", "revisionism", "revitalise", "revitalize", "revival", "revivalist", "revive", "revivify", "revocable", "revocation", "revoke", "revolt", "revolting", "revolution", "revolutionary", "revolutionise", "revolutionize", "revolve", "revolver", "revolving", "revue", "revulsion", "reward", "rewarding", "rewards", "rewire", "reword", "rewrite", "rex", "rhapsodise", "rhapsodize", "rhapsody", "rhea", "rhenish", "rheostat", "rhetoric", "rhetorical", "rhetorically", "rhetorician", "rheum", "rheumatic", "rheumaticky", "rheumatics", "rheumatism", "rheumatoid", "rhinestone", "rhinoceros", "rhizome", "rhododendron", "rhomboid", "rhombus", "rhubarb", "rhyme", "rhymed", "rhymester", "rhythm", "rhythmic", "rib", "ribald", "ribaldry", "ribbed", "ribb
 ing", "ribbon", "riboflavin", "rice", "rich", "riches", "richly", "richness", "rick", "rickets", "rickety", "ricksha", "rickshaw", "ricochet", "rid", "riddance", "ridden", "riddle", "ride", "rider", "riderless", "ridge", "ridgepole", "ridicule", "ridiculous", "riding", "riesling", "rife", "riff", "riffle", "riffraff", "rifle", "rifleman", "rifles", "rifling", "rift", "rig", "rigging", "right", "righteous", "rightful", "rightist", "rightly", "rights", "rightward", "rightwards", "rigid", "rigidity", "rigmarole", "rigor", "rigorous", "rigour", "rile", "rill", "rim", "rime", "rind", "rinderpest", "ring", "ringer", "ringleader", "ringlet", "ringmaster", "ringside", "ringworm", "rink", "rinse", "riot", "riotous", "rip", "riparian", "ripcord", "ripen", "riposte", "ripple", "ripsaw", "riptide", "rise", "riser", "risibility", "risible", "rising", "risk", "risky", "risotto", "rissole", "rite", "ritual", "ritualism", "ritzy", "rival", "rivalry", "rive", "river", "riverbed", "riverside", "rivet
 ", "riveter", "riveting", "riviera", "rivulet", "rna", "roach", "road", "roadbed", "roadblock", "roadhouse", "roadman", "roadside", "roadstead", "roadster", "roadway", "roadworthy", "roam", "roan", "roar", "roaring", "roast", "roaster", "roasting", "rob", "robber", "robbery", "robe", "robin", "robot", "robust", "rock", "rockbound", "rocker", "rockery", "rocket", "rocketry", "rocks", "rocky", "rococo", "rod", "rode", "rodent", "rodeo", "rodomontade", "roe", "roebuck", "rogation", "roger", "rogue", "roguery", "roguish", "roisterer", "role", "roll", "roller", "rollicking", "rolling", "rolls", "romaic", "roman", "romance", "romanesque", "romantic", "romanticise", "romanticism", "romanticize", "romany", "romish", "romp", "romper", "rompers", "rondeau", "rondo", "roneo", "rood", "roodscreen", "roof", "roofing", "roofless", "rooftree", "rook", "rookery", "rookie", "room", "roomer", "roommate", "rooms", "roomy", "roost", "rooster", "root", "rooted", "rootless", "roots", "rope", "ropedancer"
 , "ropes", "ropewalk", "ropeway", "ropey", "ropy", "roquefort", "rosary", "rose", "roseate", "rosebud", "roseleaf", "rosemary", "rosette", "rosewater", "rosewood", "rosin", "roster", "rostrum", "rosy", "rot", "rota", "rotary", "rotate", "rotation", "rotatory", "rotgut", "rotisserie", "rotogravure", "rotor", "rotten", "rottenly", "rotter", "rotund", "rotunda", "rouble", "rouge", "rough", "roughage", "roughcast", "roughen", "roughhouse", "roughly", "roughneck", "roughness", "roughrider", "roughshod", "roulette", "round", "roundabout", "roundel", "roundelay", "rounders", "roundhead", "roundhouse", "roundish", "roundly", "rounds", "roundsman", "roundup", "roup", "rouse", "rousing", "roustabout", "rout", "route", "routine", "roux", "rove", "rover", "row", "rowan", "rowanberry", "rowdy", "rowdyism", "rowel", "rower", "rowing", "rowlock", "royal", "royalist", "royalty", "rpm", "rsm", "rsvp", "rub", "rubber", "rubberise", "rubberize", "rubberneck", "rubbery", "rubbing", "rubbish", "rubbishy
 ", "rubble", "rubdown", "rubella", "rubicon", "rubicund", "ruble", "rubric", "ruby", "ruck", "rucksack", "ruckus", "ruction", "ructions", "rudder", "ruddle", "ruddy", "rude", "rudely", "rudiment", "rudimentary", "rudiments", "rue", "rueful", "ruff", "ruffian", "ruffianly", "ruffle", "rug", "rugby", "rugged", "ruin", "ruination", "ruinous", "ruins", "rule", "rulebook", "ruler", "ruling", "rum", "rumba", "rumble", "rumbling", "rumbustious", "ruminant", "ruminate", "ruminative", "rummage", "rummy", "rumor", "rumored", "rumormonger", "rumour", "rumoured", "rumourmonger", "rump", "rumple", "rumpus", "run", "runaway", "rung", "runnel", "runner", "running", "runny", "runs", "runt", "runway" };
+        internal static string[] data = new string[] {
+            "pedant","pedantic","pedantry","peddle","peddler",
+            "pederast","pederasty","pedestal","pedestrian","pediatrician",
+            "pediatrics","pedicab","pedicel","pedicure","pedigree",
+            "pediment","pedlar","pedometer","pee","peek",
+            "peekaboo","peel","peeler","peelings","peep",
+            "peeper","peephole","peepul","peer","peerage",
+            "peeress","peerless","peeve","peevish","peewit",
+            "peg","pejorative","pekinese","pekingese","pekoe",
+            "pelagic","pelf","pelican","pellagra","pellet",
+            "pellucid","pelmet","pelota","pelt","pelvic",
+            "pelvis","pemican","pemmican","pen","penal",
+            "penalise","penalize","penalty","penance","pence",
+            "penchant","pencil","pendant","pendent","pending",
+            "pendulous","pendulum","penetrate","penetrating","penetration",
+            "penetrative","penguin","penicillin","peninsula","penis",
+            "penitent","penitential","penitentiary","penknife","penmanship",
+            "pennant","penniless","pennon","penny","pennyweight",
+            "pennywort","penology","pension","pensionable","pensioner",
+            "pensive","pentagon","pentagram","pentameter","pentateuch",
+            "pentathlon","pentecost","penthouse","penultimate","penumbra",
+            "penurious","penury","peon","peony","people",
+            "pep","pepper","peppercorn","peppermint","peppery",
+            "pepsin","peptic","per","peradventure","perambulate",
+            "perambulator","perceive","percentage","percentile","perceptible",
+            "perception","perceptive","perch","perchance","percipient",
+            "percolate","percolator","percussion","percussionist","perdition",
+            "peregrination","peremptory","perennial","perfect","perfectible",
+            "perfection","perfectionist","perfectly","perfidious","perfidy",
+            "perforate","perforation","perforce","perform","performance",
+            "performer","perfume","perfumier","perfunctory","pergola",
+            "perhaps","perigee","perihelion","peril","perilous",
+            "perimeter","period","periodic","periodical","periods",
+            "peripatetic","peripheral","periphery","periphrasis","periphrastic",
+            "periscope","perish","perishable","perisher","perishing",
+            "peristyle","peritonitis","periwig","periwinkle","perjure",
+            "perjurer","perjury","perk","perky","perm",
+            "permafrost","permanence","permanency","permanent","permanganate",
+            "permeable","permeate","permissible","permission","permissive",
+            "permit","permutation","permute","pernicious","pernickety",
+            "pernod","peroration","peroxide","perpendicular","perpetrate",
+            "perpetual","perpetuate","perpetuity","perplex","perplexed",
+            "perplexity","perquisite","perry","persecute","persecution",
+            "perseverance","persevere","persevering","persian","persiflage",
+            "persimmon","persist","persistence","persistent","persnickety",
+            "person","persona","personable","personage","personal",
+            "personalise","personalities","personality","personalize","personally",
+            "personification","personify","personnel","perspective","perspex",
+            "perspicacious","perspiration","perspire","persuade","persuasion",
+            "persuasive","pert","pertain","pertinacious","pertinent",
+            "perturb","perturbation","peruke","peruse","pervade",
+            "pervasive","perverse","perversion","perversity","pervert",
+            "peseta","pesky","peso","pessary","pessimism",
+            "pessimist","pest","pester","pesticide","pestiferous",
+            "pestilence","pestilent","pestle","pet","petal",
+            "petaled","petalled","petard","peterman","petite",
+            "petition","petitioner","petrel","petrifaction","petrify",
+            "petrochemical","petrol","petroleum","petrology","petticoat",
+            "pettifogging","pettish","petty","petulant","petunia",
+            "pew","pewit","pewter","peyote","pfennig",
+            "phaeton","phagocyte","phalanx","phalarope","phallic",
+            "phallus","phantasmagoria","phantasmal","phantasy","phantom",
+            "pharaoh","pharisaic","pharisee","pharmaceutical","pharmacist",
+            "pharmacology","pharmacopoeia","pharmacy","pharyngitis","pharynx",
+            "phase","phd","pheasant","phenobarbitone","phenol",
+            "phenomenal","phenomenally","phenomenon","phew","phi",
+            "phial","philander","philanthropic","philanthropist","philanthropy",
+            "philatelist","philately","philharmonic","philhellene","philippic",
+            "philistine","philological","philologist","philology","philosopher",
+            "philosophical","philosophise","philosophize","philosophy","philter",
+            "philtre","phizog","phlebitis","phlebotomy","phlegm",
+            "phlegmatic","phlox","phobia","phoenician","phoenix",
+            "phone","phoneme","phonemic","phonemics","phonetic",
+            "phonetician","phonetics","phoney","phonic","phonics",
+            "phonograph","phonology","phony","phooey","phosphate",
+            "phosphorescence","phosphorescent","phosphoric","phosphorus","photo",
+            "photocopier","photocopy","photoelectric","photogenic","photograph",
+            "photographer","photographic","photography","photosensitive","photosensitize",
+            "photostat","photosynthesis","phototsensitise","phrasal","phrase",
+            "phrasebook","phraseology","phrenetic","phrenology","phthisis",
+            "phut","phylloxera","phylum","physic","physical",
+            "physically","physician","physicist","physics","physio",
+            "physiognomy","physiology","physiotherapy","physique","pianissimo",
+            "pianist","piano","pianola","piaster","piastre",
+            "piazza","pibroch","picador","picaresque","piccalilli",
+            "piccaninny","piccolo","pick","pickaback","pickaninny",
+            "pickax","pickaxe","picked","picker","pickerel",
+            "picket","pickings","pickle","pickled","pickpocket",
+            "picky","picnic","picnicker","pictorial","picture",
+            "pictures","picturesque","piddle","piddling","pidgin",
+            "pie","piebald","piece","piecemeal","pieces",
+            "piecework","piecrust","pied","pier","pierce",
+            "piercing","pierrot","piety","piezoelectric","piffle",
+            "piffling","pig","pigeon","pigeonhole","piggery",
+            "piggish","piggy","piggyback","piggybank","pigheaded",
+            "piglet","pigment","pigmentation","pigmy","pignut",
+            "pigskin","pigsticking","pigsty","pigswill","pigtail",
+            "pike","pikestaff","pilaster","pilau","pilchard",
+            "pile","piles","pileup","pilfer","pilferage",
+            "pilgrim","pilgrimage","pill","pillage","pillar",
+            "pillbox","pillion","pillock","pillory","pillow",
+            "pillowcase","pilot","pimento","pimp","pimpernel",
+            "pimple","pin","pinafore","pincer","pincers",
+            "pinch","pinchbeck","pinched","pinchpenny","pincushion",
+            "pine","pineal","pineapple","pinecone","pinewood",
+            "piney","ping","pinhead","pinion","pink",
+            "pinkeye","pinkie","pinkish","pinko","pinky",
+            "pinnace","pinnacle","pinnate","pinny","pinpoint",
+            "pinprick","pinstripe","pint","pinta","pintable",
+            "pinup","pinwheel","piny","pioneer","pious",
+            "piousness","pip","pipal","pipe","pipeline",
+            "piper","pipes","pipette","piping","pipit",
+            "pippin","pipsqueak","piquant","pique","piquet",
+            "piracy","piranha","pirate","pirouette","piscatorial",
+            "pish","piss","pissed","pistachio","pistil",
+            "pistol","piston","pit","pitch","pitchblende",
+            "pitcher","pitchfork","piteous","pitfall","pith",
+            "pithead","pithy","pitiable","pitiful","pitiless",
+            "pitman","piton","pittance","pituitary","pity",
+            "pivot","pivotal","pixie","pixilated","pixy",
+            "pizza","pizzicato","placard","placate","place",
+            "placebo","placed","placekick","placement","placenta",
+            "placid","placket","plagarise","plagarize","plagiarism",
+            "plague","plaguey","plaice","plaid","plain",
+            "plainly","plainsman","plainsong","plainspoken","plaint",
+            "plaintiff","plaintive","plait","plan","planchette",
+            "planet","planetarium","planetary","plangent","plank",
+            "planking","plankton","planner","plant","plantain",
+            "plantation","planter","plaque","plash","plasma",
+            "plaster","plasterboard","plastered","plasterer","plastering",
+            "plastic","plasticine","plasticity","plastics","plastron",
+            "plate","plateau","platelayer","platform","plating",
+            "platinum","platitude","platonic","platoon","platter",
+            "platypus","plaudit","plausible","play","playable",
+            "playback","playbill","playboy","player","playful",
+            "playgoer","playground","playgroup","playhouse","playmate",
+            "playpen","playroom","playsuit","plaything","playtime",
+            "playwright","plaza","plea","pleach","plead",
+            "pleading","pleadings","pleasant","pleasantry","please",
+            "pleased","pleasing","pleasurable","pleasure","pleat",
+            "pleb","plebeian","plebiscite","plectrum","pled",
+            "pledge","pleistocene","plenary","plenipotentiary","plenitude",
+            "plenteous","plentiful","plenty","pleonasm","plethora",
+            "pleurisy","plexus","pliable","pliant","pliers",
+            "plight","plimsoll","plinth","pliocene","plod",
+            "plodder","plonk","plop","plosive","plot",
+            "plough","ploughboy","ploughman","ploughshare","plover",
+            "plow","plowboy","plowman","plowshare","ploy",
+            "pluck","plucky","plug","plughole","plum",
+            "plumage","plumb","plumbago","plumber","plumbing",
+            "plume","plumed","plummet","plummy","plump",
+            "plunder","plunge","plunger","plunk","pluperfect",
+            "plural","pluralism","plurality","pluribus","plus",
+            "plush","plushy","pluto","plutocracy","plutocrat",
+            "plutonium","ply","plywood","pneumatic","pneumoconiosis",
+            "pneumonia","poach","poacher","pock","pocked",
+            "pocket","pocketbook","pocketful","pocketknife","pockmark",
+            "pockmarked","pod","podgy","podiatry","podium",
+            "poem","poesy","poet","poetaster","poetess",
+            "poetic","poetical","poetry","pogrom","poignancy",
+            "poignant","poinsettia","point","pointed","pointer",
+            "pointillism","pointless","points","pointsman","poise",
+            "poised","poison","poisonous","poke","poker",
+            "pokerwork","poky","polack","polar","polarisation",
+            "polarise","polarity","polarization","polarize","polaroid",
+            "polaroids","polder","pole","poleax","poleaxe",
+            "polecat","polemic","polemical","polemics","police",
+            "policeman","policewoman","policy","polio","polish",
+            "polisher","politburo","polite","politic","politicalise",
+            "politicalize","politician","politicise","politicize","politicking",
+            "politico","politics","polity","polka","poll",
+            "pollard","pollen","pollinate","polling","pollster",
+            "pollutant","pollute","pollution","polly","pollyanna",
+            "polo","polonaise","polony","poltergeist","poltroon",
+            "poly","polyandrous","polyandry","polyanthus","polyester",
+            "polyethylene","polygamist","polygamous","polygamy","polyglot",
+            "polygon","polymath","polymer","polymorphous","polyp",
+            "polyphony","polypus","polystyrene","polysyllable","polytechnic",
+            "polytheism","polythene","polyurethane","pomade","pomander",
+            "pomegranate","pomeranian","pommel","pommy","pomp",
+            "pompom","pomposity","pompous","ponce","poncho",
+            "poncy","pond","ponder","ponderous","pone",
+            "pong","poniard","pontiff","pontifical","pontificals",
+            "pontificate","pontoon","pony","ponytail","pooch",
+            "poodle","poof","pooh","pool","poolroom",
+            "pools","poop","pooped","poor","poorhouse",
+            "poorly","poorness","poove","pop","popadam",
+            "popadum","popcorn","popery","popgun","popinjay",
+            "popish","poplar","poplin","poppa","popper",
+            "poppet","poppy","poppycock","popshop","popsy",
+            "populace","popular","popularise","popularity","popularize",
+            "popularly","populate","population","populism","populist",
+            "populous","porcelain","porch","porcine","porcupine",
+            "pore","pork","porker","porky","porn",
+            "pornography","porosity","porous","porphyry","porpoise",
+            "porridge","porringer","port","portable","portage",
+            "portal","portals","portcullis","portend","portent",
+            "portentous","porter","porterage","porterhouse","portfolio",
+            "porthole","portico","portion","portly","portmanteau",
+            "portrait","portraitist","portraiture","portray","portrayal",
+            "pose","poser","poseur","posh","posit",
+            "position","positional","positive","positively","positiveness",
+            "positivism","positron","posse","possess","possessed",
+            "possession","possessive","possessor","posset","possibility",
+            "possible","possibly","possum","post","postage",
+            "postal","postbag","postbox","postcard","postcode",
+            "postdate","poster","posterior","posterity","postern",
+            "postgraduate","posthaste","posthumous","postilion","postillion",
+            "posting","postman","postmark","postmaster","postmortem",
+            "postpaid","postpone","postprandial","postscript","postulant",
+            "postulate","posture","postwar","posy","pot",
+            "potable","potash","potassium","potation","potato",
+            "potbellied","potbelly","potboiler","potbound","poteen",
+            "potency","potent","potentate","potential","potentiality",
+            "pothead","pother","potherb","pothole","potholing",
+            "pothouse","pothunter","potion","potluck","potpourri",
+            "potsherd","potshot","pottage","potted","potter",
+            "potteries","pottery","potty","pouch","pouf",
+            "pouffe","poulterer","poultice","poultry","pounce",
+            "pound","poundage","pounding","pour","pout",
+            "poverty","powder","powdered","powdery","power",
+            "powerboat","powerful","powerhouse","powerless","powers",
+            "powwow","pox","pps","practicable","practical",
+            "practicality","practically","practice","practiced","practise",
+            "practised","practitioner","praesidium","praetor","praetorian",
+            "pragmatic","pragmatism","prairie","praise","praises",
+            "praiseworthy","praline","pram","prance","prank",
+            "prankster","prat","prate","pratfall","prattle",
+            "prawn","praxis","pray","prayer","preach",
+            "preachify","preamble","prearrange","prebend","prebendary",
+            "precarious","precast","precaution","precede","precedence",
+            "precedent","preceding","precentor","precept","preceptor",
+            "precession","precinct","precincts","preciosity","precious",
+            "precipice","precipitate","precipitation","precipitous","precise",
+            "precisely","precision","preclude","precocious","precognition",
+            "preconceived","preconception","precondition","precook","precursor",
+            "predator","predatory","predecease","predecessor","predestinate",
+            "predestination","predestine","predetermine","predeterminer","predicament",
+            "predicate","predicative","predict","predictable","prediction",
+            "predigest","predilection","predispose","predisposition","predominance",
+            "predominant","predominantly","predominate","preeminent","preeminently",
+            "preempt","preemption","preemptive","preen","preexist",
+            "preexistence","prefab","prefabricate","prefabricated","preface",
+            "prefatory","prefect","prefecture","prefer","preferable",
+            "preference","preferential","preferment","prefigure","prefix",
+            "pregnancy","pregnant","preheat","prehensile","prehistoric",
+            "prehistory","prejudge","prejudice","prejudiced","prejudicial",
+            "prelacy","prelate","prelim","preliminary","prelims",
+            "preliterate","prelude","premarital","premature","premeditate",
+            "premeditated","premier","premise","premises","premiss",
+            "premium","premonition","premonitory","prenatal","prentice",
+            "preoccupation","preoccupied","preoccupy","preordain","prep",
+            "prepack","preparation","preparatory","prepare","prepared",
+            "preparedness","prepay","preponderance","preponderant","preponderate",
+            "preposition","prepositional","prepossessed","prepossessing","prepossession",
+            "preposterous","prepuce","prerecord","prerequisite","prerogative",
+            "presage","presbyter","presbyterian","presbytery","preschool",
+            "prescient","prescribe","prescribed","prescript","prescription",
+            "prescriptive","presence","present","presentable","presentation",
+            "presenter","presentiment","presently","presents","preservable",
+            "preservation","preservative","preserve","preserver","preset",
+            "preshrunk","preside","presidency","president","presidential",
+            "presidium","press","pressed","pressgang","pressing",
+            "pressman","pressmark","pressure","pressurise","pressurize",
+            "prestidigitation","prestige","prestigious","prestissimo","presto",
+            "prestressed","presumable","presume","presumption","presumptive",
+            "presumptuous","presuppose","presupposition","pretence","pretend",
+            "pretended","pretender","pretense","pretension","pretentious",
+            "pretentiousness","preterit","preterite","preternatural","pretext",
+            "pretor","pretorian","prettify","prettily","pretty",
+            "pretzel","prevail","prevailing","prevalent","prevaricate",
+            "prevent","prevention","preventive","preview","previous",
+            "prevision","prewar","prey","price","priceless",
+            "pricey","prick","prickle","prickly","pricy",
+            "pride","priest","priesthood","priestly","prig",
+            "priggish","prim","primacy","primaeval","primal",
+            "primarily","primary","primate","prime","primer",
+            "primeval","priming","primitive","primogeniture","primordial",
+            "primp","primrose","primula","primus","prince",
+            "princedom","princely","princess","principal","principality",
+            "principally","principle","principled","principles","prink",
+            "print","printable","printer","printing","printout",
+            "prior","priority","priory","prise","prism",
+            "prismatic","prison","prisoner","prissy","pristine",
+            "prithee","privacy","private","privateer","privation",
+            "privet","privilege","privileged","privily","privy",
+            "prize","prizefight","prizeman","pro","probability",
+            "probable","probably","probate","probation","probationer",
+            "probe","probity","problem","problematic","proboscis",
+            "procedural","procedure","proceed","proceeding","proceedings",
+            "proceeds","process","procession","processional","proclaim",
+            "proclamation","proclivity","proconsul","proconsulate","procrastinate",
+            "procreate","proctor","procure","procurer","prod",
+            "prodigal","prodigious","prodigy","produce","producer",
+            "product","production","productive","productivity","proem",
+            "prof","profanation","profane","profanity","profess",
+            "professed","professedly","profession","professional","professionalism",
+            "professor","professorial","professorship","proffer","proficient",
+            "profile","profit","profitable","profiteer","profligacy",
+            "profligate","profound","profundity","profuse","profusion",
+            "progenitor","progeny","progesterone","prognathous","prognosis",
+            "prognostic","prognosticate","prognostication","program","programer",
+            "programmer","progress","progression","progressive","prohibit",
+            "prohibition","prohibitionist","prohibitive","prohibitory","project",
+            "projectile","projection","projectionist","projector","prolapse",
+            "prole","prolegomena","proletarian","proletariat","proliferate",
+            "proliferation","prolific","prolix","prolog","prologue",
+            "prolong","prolongation","prolonged","prom","promenade",
+            "promenader","prominence","prominent","promiscuity","promiscuous",
+            "promise","promising","promontory","promote","promoter",
+            "promotion","prompt","prompter","promptness","promulgate",
+            "pron","prone","prong","pronominal","pronoun",
+            "pronounce","pronounceable","pronounced","pronouncement","pronto",
+            "pronunciamento","pronunciation","proof","proofread","prop",
+            "propaganda","propagandise","propagandist","propagandize","propagate",
+            "propagation","propane","propel","propellant","propellent",
+            "propeller","propensity","proper","properly","propertied",
+            "property","prophecy","prophesy","prophet","prophetess",
+            "prophetic","prophets","prophylactic","prophylaxis","propinquity",
+            "propitiate","propitiatory","propitious","propjet","proponent",
+            "proportion","proportional","proportionate","proportions","proposal",
+            "propose","proposition","propound","proprietary","proprieties",
+            "proprietor","proprietress","propriety","propulsion","propulsive",
+            "propylene","prorogation","prorogue","prosaic","proscenium",
+            "proscribe","proscription","prose","prosecute","prosecution",
+            "prosecutor","proselyte","proselytise","proselytize","prosody",
+            "prospect","prospective","prospector","prospects","prospectus",
+            "prosper","prosperity","prosperous","prostate","prosthesis",
+            "prostitute","prostitution","prostrate","prostration","prosy",
+            "protagonist","protean","protect","protection","protectionism",
+            "protective","protector","protectorate","protein","protest",
+            "protestant","protestation","protocol","proton","protoplasm",
+            "prototype","protozoa","protozoan","protozoon","protract",
+            "protraction","protractor","protrude","protrusion","protrusive",
+            "protuberance","protuberant","proud","provable","prove",
+            "proven","provenance","provender","proverb","proverbial",
+            "proverbially","proverbs","provide","provided","providence",
+            "provident","providential","provider","providing","province",
+            "provinces","provincial","provision","provisional","provisions",
+            "proviso","provocation","provocative","provoke","provoking",
+            "provost","prow","prowess","prowl","prowler",
+            "prox","proximal","proximate","proximity","proximo",
+            "proxy","prude","prudence","prudent","prudential",
+            "prudery","prudish","prune","pruning","prurience",
+            "prurient","pruritus","prussian","pry","psalm",
+            "psalmist","psalmody","psalms","psalter","psaltery",
+            "psephology","pseud","pseudonym","pseudonymous","pshaw",
+            "psittacosis","psoriasis","psst","psyche","psychedelic",
+            "psychiatric","psychiatrist","psychiatry","psychic","psycho",
+            "psychoanalyse","psychoanalysis","psychoanalyst","psychoanalytic","psychoanalyze",
+            "psychokinesis","psychological","psychologist","psychology","psychopath",
+            "psychosis","psychosomatic","psychotherapy","psychotic","pta",
+            "ptarmigan","pterodactyl","pto","ptomaine","pub",
+            "puberty","pubic","public","publican","publication",
+            "publicise","publicist","publicity","publicize","publish",
+            "publisher","publishing","puce","puck","pucker",
+            "puckish","pud","pudding","puddle","pudendum",
+            "pudgy","pueblo","puerile","puerility","puerperal",
+            "puff","puffball","puffed","puffer","puffin",
+            "puffy","pug","pugilism","pugilist","pugnacious",
+            "pugnacity","puissance","puissant","puke","pukka",
+            "pulchritude","pulchritudinous","pule","pull","pullback",
+            "pullet","pulley","pullman","pullout","pullover",
+            "pullthrough","pullulate","pulmonary","pulp","pulpit",
+            "pulsar","pulsate","pulsation","pulse","pulverise",
+            "pulverize","puma","pumice","pummel","pump",
+            "pumpernickel","pumpkin","pun","punch","punchy",
+            "punctilio","punctilious","punctual","punctuate","punctuation",
+            "puncture","pundit","pungent","punic","punish",
+            "punishable","punishing","punishment","punitive","punjabi",
+            "punk","punkah","punnet","punster","punt",
+            "puny","pup","pupa","pupate","pupil",
+            "puppet","puppeteer","puppy","purblind","purchase",
+            "purchaser","purdah","pure","pureblooded","purebred",
+            "puree","purely","pureness","purgation","purgative",
+            "purgatory","purge","purification","purify","purist",
+            "puritan","puritanical","purity","purl","purler",
+            "purlieus","purloin","purple","purplish","purport",
+            "purpose","purposeful","purposeless","purposely","purposive",
+            "purr","purse","purser","pursuance","pursue",
+            "pursuer","pursuit","purulent","purvey","purveyance",
+            "purveyor","purview","pus","push","pushbike",
+            "pushcart","pushchair","pushed","pusher","pushover",
+            "pushy","pusillanimous","puss","pussy","pussycat",
+            "pussyfoot","pustule","put","putative","putrefaction",
+            "putrefactive","putrefy","putrescent","putrid","putsch",
+            "putt","puttee","putter","putto","putty",
+            "puzzle","puzzlement","puzzler","pvc","pygmy",
+            "pyjama","pyjamas","pylon","pyorrhea","pyorrhoea",
+            "pyramid","pyre","pyrex","pyrexia","pyrites",
+            "pyromania","pyromaniac","pyrotechnic","pyrotechnics","python",
+            "pyx","qed","qty","qua","quack",
+            "quackery","quad","quadragesima","quadrangle","quadrangular",
+            "quadrant","quadrilateral","quadrille","quadrillion","quadroon",
+            "quadruped","quadruple","quadruplet","quadruplicate","quaff",
+            "quagga","quagmire","quail","quaint","quake",
+            "quaker","qualification","qualifications","qualified","qualifier",
+            "qualify","qualitative","quality","qualm","quandary",
+            "quantify","quantitative","quantity","quantum","quarantine",
+            "quark","quarrel","quarrelsome","quarry","quart",
+            "quarter","quarterdeck","quarterfinal","quartering","quarterly",
+            "quartermaster","quarters","quarterstaff","quartet","quartette",
+            "quarto","quartz","quasar","quash","quatercentenary",
+            "quatrain","quaver","quay","quean","queasy",
+            "queen","queenly","queer","quell","quench",
+            "quenchless","querulous","query","quest","question",
+            "questionable","questioner","questioning","questionnaire","quetzal",
+            "queue","quibble","quick","quicken","quickie",
+            "quicklime","quicksand","quicksilver","quickstep","quid",
+            "quiescent","quiet","quieten","quietism","quietude",
+            "quietus","quiff","quill","quilt","quilted",
+            "quin","quince","quinine","quinquagesima","quinsy",
+            "quintal","quintessence","quintet","quintette","quintuplet",
+            "quip","quire","quirk","quisling","quit",
+            "quits","quittance","quitter","quiver","quixotic",
+            "quiz","quizmaster","quizzical","quod","quoit",
+            "quoits","quondam","quorum","quota","quotable",
+            "quotation","quote","quoth","quotidian","quotient",
+            "rabbi","rabbinical","rabbit","rabble","rabelaisian",
+            "rabid","rabies","rac","raccoon","race",
+            "racecourse","racehorse","raceme","racer","races",
+            "racetrack","racial","racialism","racially","racing",
+            "rack","racket","racketeer","racketeering","rackets",
+            "raconteur","racoon","racquet","racquets","racy",
+            "radar","radial","radiance","radiant","radiate",
+            "radiation","radiator","radical","radicalise","radicalism",
+            "radicalize","radicle","radii","radio","radioactive",
+            "radioactivity","radiogram","radiograph","radiographer","radiography",
+            "radioisotope","radiolocation","radiology","radiotherapist","radiotherapy",
+            "radish","radium","radius","raffia","raffish",
+            "raffle","raft","rafter","raftered","raftsman",
+            "rag","raga","ragamuffin","ragbag","rage",
+            "ragged","raglan","ragout","ragtag","ragtime",
+            "raid","raider","rail","railhead","railing",
+            "raillery","railroad","rails","railway","raiment",
+            "rain","rainbow","raincoat","raindrop","rainfall",
+            "rainproof","rains","rainstorm","rainwater","rainy",
+            "raise","raisin","raj","raja","rajah",
+            "rake","rakish","rallentando","rally","ram",
+            "ramadan","ramble","rambler","rambling","rambunctious",
+            "ramekin","ramification","ramify","ramjet","ramp",
+            "rampage","rampant","rampart","ramrod","ramshackle",
+            "ran","ranch","rancher","rancid","rancor",
+            "rancorous","rancour","rand","random","randy",
+            "ranee","rang","range","ranger","rani",
+            "rank","ranker","ranking","rankle","ranks",
+            "ransack","ransom","rant","rap","rapacious",
+            "rapacity","rape","rapid","rapids","rapier",
+            "rapine","rapist","rapport","rapprochement","rapscallion",
+            "rapt","rapture","rapturous","rare","rarebit",
+            "rarefied","rarefy","rarely","raring","rarity",
+            "rascal","rascally","rash","rasher","rasp",
+            "raspberry","rat","ratable","ratchet","rate",
+            "rateable","ratepayer","rather","ratify","rating",
+            "ratio","ratiocination","ration","rational","rationale",
+            "rationalise","rationalism","rationalist","rationalize","rations",
+            "ratlin","ratline","rats","rattan","ratter",
+            "rattle","rattlebrained","rattlesnake","rattletrap","rattling",
+            "ratty","raucous","raunchy","ravage","ravages",
+            "rave","ravel","raven","ravening","ravenous",
+            "raver","ravine","raving","ravings","ravioli",
+            "ravish","ravishing","ravishment","raw","rawhide",
+            "ray","rayon","raze","razor","razorback",
+            "razzle","reach","react","reaction","reactionary",
+            "reactivate","reactive","reactor","read","readable",
+            "readdress","reader","readership","readily","readiness",
+            "reading","readjust","readout","ready","reafforest",
+            "reagent","real","realign","realisable","realisation",
+            "realise","realism","realist","realistic","reality",
+            "realizable","realization","realize","really","realm",
+            "realpolitik","realtor","realty","ream","reanimate",
+            "reap","reaper","reappear","reappraisal","rear",
+            "rearguard","rearm","rearmament","rearmost","rearrange",
+            "rearward","rearwards","reason","reasonable","reasonably",
+            "reasoned","reasoning","reassure","rebarbative","rebate",
+            "rebel","rebellion","rebellious","rebind","rebirth",
+            "reborn","rebound","rebuff","rebuild","rebuke",
+            "rebus","rebut","rebuttal","recalcitrance","recalcitrant",
+            "recall","recant","recap","recapitulate","recapitulation",
+            "recapture","recast","recce","recd","recede",
+            "receipt","receipts","receivable","receive","received",
+            "receiver","receivership","receiving","recent","recently",
+            "receptacle","reception","receptionist","receptive","recess",
+            "recession","recessional","recessive","recharge","recidivist",
+            "recipe","recipient","reciprocal","reciprocate","reciprocity",
+            "recital","recitation","recitative","recite","reck",
+            "reckless","reckon","reckoner","reckoning","reclaim",
+            "reclamation","recline","recluse","recognise","recognition",
+            "recognizance","recognize","recoil","recollect","recollection",
+            "recommend","recommendation","recompense","reconcile","reconciliation",
+            "recondite","recondition","reconnaissance","reconnoiter","reconnoitre",
+            "reconsider","reconstitute","reconstruct","reconstruction","record",
+            "recorder","recording","recordkeeping","recount","recoup",
+            "recourse","recover","recovery","recreant","recreate",
+            "recreation","recreational","recriminate","recrimination","recrudescence",
+            "recruit","rectal","rectangle","rectangular","rectification",
+            "rectifier","rectify","rectilinear","rectitude","recto",
+            "rector","rectory","rectum","recumbent","recuperate",
+            "recuperative","recur","recurrence","recurrent","recurved",
+            "recusant","recycle","red","redbreast","redbrick",
+            "redcap","redcoat","redcurrant","redden","reddish",
+            "redecorate","redeem","redeemer","redemption","redemptive",
+            "redeploy","redhead","rediffusion","redirect","redistribute",
+            "redo","redolence","redolent","redouble","redoubt",
+            "redoubtable","redound","redress","redskin","reduce",
+            "reduction","redundancy","redundant","reduplicate","redwing",
+            "redwood","reecho","reed","reeds","reeducate",
+            "reedy","reef","reefer","reek","reel",
+            "reentry","reeve","ref","reface","refashion",
+            "refectory","refer","referee","reference","referendum",
+            "refill","refine","refined","refinement","refiner",
+            "refinery","refit","reflate","reflation","reflect",
+            "reflection","reflective","reflector","reflex","reflexes",
+            "reflexive","refloat","refoot","reforest","reform",
+            "reformation","reformatory","refract","refractory","refrain",
+            "refresh","refresher","refreshing","refreshment","refreshments",
+            "refrigerant","refrigerate","refrigeration","refrigerator","reft",
+            "refuel","refuge","refugee","refulgence","refulgent",
+            "refund","refurbish","refusal","refuse","refutable",
+            "refutation","refute","regain","regal","regale",
+            "regalia","regard","regardful","regarding","regardless",
+            "regards","regatta","regency","regenerate","regent",
+            "reggae","regicide","regime","regimen","regiment",
+            "regimental","regimentals","regina","region","regional",
+            "regions","register","registrar","registration","registry",
+            "regnant","regress","regressive","regret","regrets",
+            "regrettable","regrettably","regroup","regular","regularise",
+            "regularity","regularize","regularly","regulate","regulation",
+            "regulator","regulo","regurgitate","rehabilitate","rehash",
+            "rehear","rehearsal","rehearse","rehouse","reich",
+            "reification","reify","reign","reimburse","reimbursement",
+            "rein","reincarnate","reincarnation","reindeer","reinforce",
+            "reinforcement","reinforcements","reins","reinstate","reinsure",
+            "reissue","reiterate","reject","rejection","rejoice",
+            "rejoicing","rejoicings","rejoin","rejoinder","rejuvenate",
+            "rekindle","relaid","relapse","relate","related",
+            "relation","relational","relations","relationship","relative",
+            "relatively","relativism","relativistic","relativity","relax",
+            "relaxation","relaxing","relay","release","relegate",
+            "relent","relentless","relevance","relevant","reliability",
+            "reliable","reliance","reliant","relic","relics",
+            "relict","relief","relieve","relieved","religion",
+            "religious","religiously","reline","relinquish","reliquary",
+            "relish","relive","reload","relocate","reluctance",
+            "reluctant","reluctantly","rely","remain","remainder",
+            "remains","remake","remand","remark","remarkable",
+            "remarkably","remarry","remediable","remedial","remedy",
+            "remember","remembrance","remilitarise","remilitarize","remind",
+            "reminder","reminisce","reminiscence","reminiscences","reminiscent",
+            "remiss","remission","remit","remittance","remittent",
+            "remnant","remodel","remold","remonstrance","remonstrate",
+            "remorse","remorseful","remote","remotely","remould",
+            "remount","removal","remove","remover","remunerate",
+            "remunerative","renaissance","renal","rename","renascent",
+            "rend","render","rendering","rendezvous","rendition",
+            "renegade","renege","renegue","renew","renewable",
+            "renewal","rennet","renounce","renovate","renown",
+            "renowned","rent","rental","renter","rentier",
+            "renunciation","reopen","reorganise","reorganize","rep",
+            "repaid","repair","reparable","reparation","reparations",
+            "repartee","repast","repatriate","repay","repayable",
+            "repayment","repeal","repeat","repeated","repeatedly",
+            "repeater","repeating","repel","repellent","repent",
+            "repentance","repentant","repercussion","repertoire","repertory",
+            "repetition","repetitious","repine","replace","replacement",
+            "replay","replenish","replete","repletion","replica",
+            "replicate","reply","repoint","report","reportage",
+            "reportedly","reporter","repose","repository","repossess",
+            "repot","repp","reprehend","reprehensible","represent",
+            "representation","representational","representations","representative","repress",
+            "repressed","repression","repressive","reprieve","reprimand",
+            "reprint","reprisal","reprise","reproach","reprobate",
+            "reproduce","reproducer","reproduction","reproductive","reproof",
+            "reprove","reproving","reptile","reptilian","republic",
+            "republican","republicanism","repudiate","repugnance","repugnant",
+            "repulse","repulsion","repulsive","reputable","reputation",
+            "repute","reputed","reputedly","request","requiem",
+            "require","requirement","requisite","requisition","requital",
+            "requite","reredos","rerun","rescind","rescript",
+            "rescue","research","reseat","resemblance","resemble",
+            "resent","resentment","reservation","reserve","reserved",
+            "reservedly","reservist","reservoir","reset","resettle",
+            "reshuffle","reside","residence","residency","resident",
+            "residential","residual","residuary","residue","resign",
+            "resignation","resigned","resilience","resilient","resin",
+            "resinated","resist","resistance","resistant","resistor",
+            "resole","resolute","resolution","resolvable","resolve",
+            "resonance","resonant","resonate","resonator","resort",
+            "resound","resounding","resource","resourceful","resources",
+            "respect","respectability","respectable","respecter","respectful",
+            "respecting","respective","respectively","respects","respiration",
+            "respirator","respiratory","respire","respite","resplendence",
+            "resplendent","respond","respondent","response","responsibility",
+            "responsible","responsibly","responsive","rest","restage",
+            "restate","restaurant","restaurateur","restful","restitution",
+            "restive","restless","restock","restoration","restorative",
+            "restore","restorer","restrain","restrained","restraint",
+            "restrict","restricted","restriction","restrictive","restructure",
+            "result","resultant","resume","resumption","resurface",
+            "resurgence","resurgent","resurrect","resurrection","resuscitate",
+            "retail","retailer","retain","retainer","retake",
+            "retaliate","retaliation","retaliatory","retard","retarded",
+            "retch","retd","retell","retention","retentive",
+            "rethink","reticence","reticent","reticulated","reticulation",
+            "reticule","retina","retinue","retire","retired",
+            "retirement","retiring","retort","retouch","retrace",
+            "retract","retractable","retractile","retraction","retread",
+            "retreat","retrench","retrial","retraining","retribution",
+            "retributive","retrieval","retrieve","retriever","retroactive",
+            "retroflex","retrograde","retrogress","retrogressive","retrospect",
+            "retrospection","retrospective","retroversion","retsina","return",
+            "returnable","returns","reunion","reunite","reuse",
+            "rev","revalue","revamp","reveal","revealing",
+            "reveille","revel","revelation","revelry","revenge",
+            "revenue","reverberant","reverberate","reverberation","revere",
+            "reverence","reverend","reverent","reverential","reverie",
+            "revers","reversal","reverse","reversion","reversionary",
+            "revert","revetment","review","reviewer","revile",
+            "revise","revision","revisionism","revitalise","revitalize",
+            "revival","revivalist","revive","revivify","revocable",
+            "revocation","revoke","revolt","revolting","revolution",
+            "revolutionary","revolutionise","revolutionize","revolve","revolver",
+            "revolving","revue","revulsion","reward","rewarding",
+            "rewards","rewire","reword","rewrite","rex",
+            "rhapsodise","rhapsodize","rhapsody","rhea","rhenish",
+            "rheostat","rhetoric","rhetorical","rhetorically","rhetorician",
+            "rheum","rheumatic","rheumaticky","rheumatics","rheumatism",
+            "rheumatoid","rhinestone","rhinoceros","rhizome","rhododendron",
+            "rhomboid","rhombus","rhubarb","rhyme","rhymed",
+            "rhymester","rhythm","rhythmic","rib","ribald",
+            "ribaldry","ribbed","ribbing","ribbon","riboflavin",
+            "rice","rich","riches","richly","richness",
+            "rick","rickets","rickety","ricksha","rickshaw",
+            "ricochet","rid","riddance","ridden","riddle",
+            "ride","rider","riderless","ridge","ridgepole",
+            "ridicule","ridiculous","riding","riesling","rife",
+            "riff","riffle","riffraff","rifle","rifleman",
+            "rifles","rifling","rift","rig","rigging",
+            "right","righteous","rightful","rightist","rightly",
+            "rights","rightward","rightwards","rigid","rigidity",
+            "rigmarole","rigor","rigorous","rigour","rile",
+            "rill","rim","rime","rind","rinderpest",
+            "ring","ringer","ringleader","ringlet","ringmaster",
+            "ringside","ringworm","rink","rinse","riot",
+            "riotous","rip","riparian","ripcord","ripen",
+            "riposte","ripple","ripsaw","riptide","rise",
+            "riser","risibility","risible","rising","risk",
+            "risky","risotto","rissole","rite","ritual",
+            "ritualism","ritzy","rival","rivalry","rive",
+            "river","riverbed","riverside","rivet","riveter",
+            "riveting","riviera","rivulet","rna","roach",
+            "road","roadbed","roadblock","roadhouse","roadman",
+            "roadside","roadstead","roadster","roadway","roadworthy",
+            "roam","roan","roar","roaring","roast",
+            "roaster","roasting","rob","robber","robbery",
+            "robe","robin","robot","robust","rock",
+            "rockbound","rocker","rockery","rocket","rocketry",
+            "rocks","rocky","rococo","rod","rode",
+            "rodent","rodeo","rodomontade","roe","roebuck",
+            "rogation","roger","rogue","roguery","roguish",
+            "roisterer","role","roll","roller","rollicking",
+            "rolling","rolls","romaic","roman","romance",
+            "romanesque","romantic","romanticise","romanticism","romanticize",
+            "romany","romish","romp","romper","rompers",
+            "rondeau","rondo","roneo","rood","roodscreen",
+            "roof","roofing","roofless","rooftree","rook",
+            "rookery","rookie","room","roomer","roommate",
+            "rooms","roomy","roost","rooster","root",
+            "rooted","rootless","roots","rope","ropedancer",
+            "ropes","ropewalk","ropeway","ropey","ropy",
+            "roquefort","rosary","rose","roseate","rosebud",
+            "roseleaf","rosemary","rosette","rosewater","rosewood",
+            "rosin","roster","rostrum","rosy","rot",
+            "rota","rotary","rotate","rotation","rotatory",
+            "rotgut","rotisserie","rotogravure","rotor","rotten",
+            "rottenly","rotter","rotund","rotunda","rouble",
+            "rouge","rough","roughage","roughcast","roughen",
+            "roughhouse","roughly","roughneck","roughness","roughrider",
+            "roughshod","roulette","round","roundabout","roundel",
+            "roundelay","rounders","roundhead","roundhouse","roundish",
+            "roundly","rounds","roundsman","roundup","roup",
+            "rouse","rousing","roustabout","rout","route",
+            "routine","roux","rove","rover","row",
+            "rowan","rowanberry","rowdy","rowdyism","rowel",
+            "rower","rowing","rowlock","royal","royalist",
+            "royalty","rpm","rsm","rsvp","rub",
+            "rubber","rubberise","rubberize","rubberneck","rubbery",
+            "rubbing","rubbish","rubbishy","rubble","rubdown",
+            "rubella","rubicon","rubicund","ruble","rubric",
+            "ruby","ruck","rucksack","ruckus","ruction",
+            "ructions","rudder","ruddle","ruddy","rude",
+            "rudely","rudiment","rudimentary","rudiments","rue",
+   

<TRUNCATED>

[45/50] [abbrv] lucenenet git commit: Reset the debug flag in TestFramework

Posted by sy...@apache.org.
Reset the debug flag in TestFramework


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/b04aca63
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/b04aca63
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/b04aca63

Branch: refs/heads/analysis-work
Commit: b04aca631adbc18ee4460fcf0dd6a6cfb416ce1c
Parents: 4d7b23c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 10:52:00 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 10:52:00 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b04aca63/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index 7a6d6fb..8d3d4e5 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -21,7 +21,7 @@
     <DebugType>full</DebugType>
     <Optimize>false</Optimize>
     <OutputPath>bin\Debug\</OutputPath>
-    <DefineConstants>TRACE</DefineConstants>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
     <Prefer32Bit>false</Prefer32Bit>


[34/50] [abbrv] lucenenet git commit: Fixed "index out of range" and "key not found" bugs in Facet.Taxonomy.TestTaxonomyFacetCounts

Posted by sy...@apache.org.
Fixed "index out of range" and "key not found" bugs in Facet.Taxonomy.TestTaxonomyFacetCounts


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/132da593
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/132da593
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/132da593

Branch: refs/heads/analysis-work
Commit: 132da59331a3f9e9f1ed4a276cb79e7118617095
Parents: 9313ff9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 11:02:05 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 11:02:05 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/132da593/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
index a2b412e..72390e2 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
@@ -791,7 +791,7 @@ namespace Lucene.Net.Facet.Taxonomy
                 var expectedCounts = new List<Dictionary<string, int?>>();
                 for (int i = 0; i < numDims; i++)
                 {
-                    expectedCounts[i] = new Dictionary<string, int?>();
+                    expectedCounts.Add(new Dictionary<string, int?>());
                 }
 
                 foreach (TestDoc doc in testDocs)
@@ -802,7 +802,7 @@ namespace Lucene.Net.Facet.Taxonomy
                         {
                             if (doc.dims[j] != null)
                             {
-                                int? v = expectedCounts[j][doc.dims[j]];
+                                int? v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null;
                                 if (v == null)
                                 {
                                     expectedCounts[j][doc.dims[j]] = 1;


[38/50] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
index f17d37c..98f1e47 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/TernaryTree.cs
@@ -1,779 +1,811 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-using System;
+\ufeffusing System;
+using System.Collections;
 using System.Collections.Generic;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-
-	/// <summary>
-	/// <h2>Ternary Search Tree.</h2>
-	/// 
-	/// <para>
-	/// A ternary search tree is a hybrid between a binary tree and a digital search
-	/// tree (trie). Keys are limited to strings. A data value of type char is stored
-	/// in each leaf node. It can be used as an index (or pointer) to the data.
-	/// Branches that only contain one key are compressed to one node by storing a
-	/// pointer to the trailer substring of the key. This class is intended to serve
-	/// as base class or helper class to implement Dictionary collections or the
-	/// like. Ternary trees have some nice properties as the following: the tree can
-	/// be traversed in sorted order, partial matches (wildcard) can be implemented,
-	/// retrieval of all keys within a given distance from the target, etc. The
-	/// storage requirements are higher than a binary tree but a lot less than a
-	/// trie. Performance is comparable with a hash table, sometimes it outperforms a
-	/// hash function (most of the time can determine a miss faster than a hash).
-	/// </para>
-	/// 
-	/// <para>
-	/// The main purpose of this java port is to serve as a base for implementing
-	/// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
-	/// requires from 5000 to 15000 hyphenation patterns which will be keys in this
-	/// tree. The strings patterns are usually small (from 2 to 5 characters), but
-	/// each char in the tree is stored in a node. Thus memory usage is the main
-	/// concern. We will sacrifice 'elegance' to keep memory requirements to the
-	/// minimum. Using java's char type as pointer (yes, I know pointer it is a
-	/// forbidden word in java) we can keep the size of the node to be just 8 bytes
-	/// (3 pointers and the data char). This gives room for about 65000 nodes. In my
-	/// tests the english patterns took 7694 nodes and the german patterns 10055
-	/// nodes, so I think we are safe.
-	/// </para>
-	/// 
-	/// <para>
-	/// All said, this is a map with strings as keys and char as value. Pretty
-	/// limited!. It can be extended to a general map by using the string
-	/// representation of an object and using the char value as an index to an array
-	/// that contains the object values.
-	/// </para>
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-
-	public class TernaryTree : ICloneable
-	{
-
-	  /// <summary>
-	  /// We use 4 arrays to represent a node. I guess I should have created a proper
-	  /// node class, but somehow Knuth's pascal code made me forget we now have a
-	  /// portable language with virtual memory management and automatic garbage
-	  /// collection! And now is kind of late, furthermore, if it ain't broken, don't
-	  /// fix it.
-	  /// </summary>
-
-	  /// <summary>
-	  /// Pointer to low branch and to rest of the key when it is stored directly in
-	  /// this node, we don't have unions in java!
-	  /// </summary>
-	  protected internal char[] lo;
-
-	  /// <summary>
-	  /// Pointer to high branch.
-	  /// </summary>
-	  protected internal char[] hi;
-
-	  /// <summary>
-	  /// Pointer to equal branch and to data when this node is a string terminator.
-	  /// </summary>
-	  protected internal char[] eq;
-
-	  /// <summary>
-	  /// <P>
-	  /// The character stored in this node: splitchar. Two special values are
-	  /// reserved:
-	  /// </P>
-	  /// <ul>
-	  /// <li>0x0000 as string terminator</li>
-	  /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
-	  /// </ul>
-	  /// <para>
-	  /// This shouldn't be a problem if we give the usual semantics to strings since
-	  /// 0xFFFF is guaranteed not to be an Unicode character.
-	  /// </para>
-	  /// </summary>
-	  protected internal char[] sc;
-
-	  /// <summary>
-	  /// This vector holds the trailing of the keys when the branch is compressed.
-	  /// </summary>
-	  protected internal CharVector kv;
-
-	  protected internal char root;
-
-	  protected internal char freenode;
-
-	  protected internal int length; // number of items in tree
-
-	  protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
-
-	  internal TernaryTree()
-	  {
-		init();
-	  }
-
-	  protected internal virtual void init()
-	  {
-		root = (char)0;
-		freenode = (char)1;
-		length = 0;
-		lo = new char[BLOCK_SIZE];
-		hi = new char[BLOCK_SIZE];
-		eq = new char[BLOCK_SIZE];
-		sc = new char[BLOCK_SIZE];
-		kv = new CharVector();
-	  }
-
-	  /// <summary>
-	  /// Branches are initially compressed, needing one node per key plus the size
-	  /// of the string key. They are decompressed as needed when another key with
-	  /// same prefix is inserted. This saves a lot of space, specially for long
-	  /// keys.
-	  /// </summary>
-	  public virtual void insert(string key, char val)
-	  {
-		// make sure we have enough room in the arrays
-		int len = key.Length + 1; // maximum number of nodes that may be generated
-		if (freenode + len > eq.Length)
-		{
-		  redimNodeArrays(eq.Length + BLOCK_SIZE);
-		}
-		char[] strkey = new char[len--];
-		key.CopyTo(0, strkey, 0, len - 0);
-		strkey[len] = (char)0;
-		root = insert(root, strkey, 0, val);
-	  }
-
-	  public virtual void insert(char[] key, int start, char val)
-	  {
-		int len = strlen(key) + 1;
-		if (freenode + len > eq.Length)
-		{
-		  redimNodeArrays(eq.Length + BLOCK_SIZE);
-		}
-		root = insert(root, key, start, val);
-	  }
-
-	  /// <summary>
-	  /// The actual insertion function, recursive version.
-	  /// </summary>
-	  private char insert(char p, char[] key, int start, char val)
-	  {
-		int len = strlen(key, start);
-		if (p == 0)
-		{
-		  // this means there is no branch, this node will start a new branch.
-		  // Instead of doing that, we store the key somewhere else and create
-		  // only one node with a pointer to the key
-		  p = freenode++;
-		  eq[p] = val; // holds data
-		  length++;
-		  hi[p] = (char)0;
-		  if (len > 0)
-		  {
-			sc[p] = (char)0xFFFF; // indicates branch is compressed
-			lo[p] = (char) kv.alloc(len + 1); // use 'lo' to hold pointer to key
-			strcpy(kv.Array, lo[p], key, start);
-		  }
-		  else
-		  {
-			sc[p] = (char)0;
-			lo[p] = (char)0;
-		  }
-		  return p;
-		}
-
-		if (sc[p] == 0xFFFF)
-		{
-		  // branch is compressed: need to decompress
-		  // this will generate garbage in the external key array
-		  // but we can do some garbage collection later
-		  char pp = freenode++;
-		  lo[pp] = lo[p]; // previous pointer to key
-		  eq[pp] = eq[p]; // previous pointer to data
-		  lo[p] = (char)0;
-		  if (len > 0)
-		  {
-			sc[p] = kv.get(lo[pp]);
-			eq[p] = pp;
-			lo[pp]++;
-			if (kv.get(lo[pp]) == 0)
-			{
-			  // key completly decompressed leaving garbage in key array
-			  lo[pp] = (char)0;
-			  sc[pp] = (char)0;
-			  hi[pp] = (char)0;
-			}
-			else
-			{
-			  // we only got first char of key, rest is still there
-			  sc[pp] = (char)0xFFFF;
-			}
-		  }
-		  else
-		  {
-			// In this case we can save a node by swapping the new node
-			// with the compressed node
-			sc[pp] = (char)0xFFFF;
-			hi[p] = pp;
-			sc[p] = (char)0;
-			eq[p] = val;
-			length++;
-			return p;
-		  }
-		}
-		char s = key[start];
-		if (s < sc[p])
-		{
-		  lo[p] = insert(lo[p], key, start, val);
-		}
-		else if (s == sc[p])
-		{
-		  if (s != 0)
-		  {
-			eq[p] = insert(eq[p], key, start + 1, val);
-		  }
-		  else
-		  {
-			// key already in tree, overwrite data
-			eq[p] = val;
-		  }
-		}
-		else
-		{
-		  hi[p] = insert(hi[p], key, start, val);
-		}
-		return p;
-	  }
-
-	  /// <summary>
-	  /// Compares 2 null terminated char arrays
-	  /// </summary>
-	  public static int strcmp(char[] a, int startA, char[] b, int startB)
-	  {
-		for (; a[startA] == b[startB]; startA++, startB++)
-		{
-		  if (a[startA] == 0)
-		  {
-			return 0;
-		  }
-		}
-		return a[startA] - b[startB];
-	  }
-
-	  /// <summary>
-	  /// Compares a string with null terminated char array
-	  /// </summary>
-	  public static int strcmp(string str, char[] a, int start)
-	  {
-		int i , d , len = str.Length;
-		for (i = 0; i < len; i++)
-		{
-		  d = (int) str[i] - a[start + i];
-		  if (d != 0)
-		  {
-			return d;
-		  }
-		  if (a[start + i] == 0)
-		  {
-			return d;
-		  }
-		}
-		if (a[start + i] != 0)
-		{
-		  return -a[start + i];
-		}
-		return 0;
-
-	  }
-
-	  public static void strcpy(char[] dst, int di, char[] src, int si)
-	  {
-		while (src[si] != 0)
-		{
-		  dst[di++] = src[si++];
-		}
-		dst[di] = (char)0;
-	  }
-
-	  public static int strlen(char[] a, int start)
-	  {
-		int len = 0;
-		for (int i = start; i < a.Length && a[i] != 0; i++)
-		{
-		  len++;
-		}
-		return len;
-	  }
-
-	  public static int strlen(char[] a)
-	  {
-		return strlen(a, 0);
-	  }
-
-	  public virtual int find(string key)
-	  {
-		int len = key.Length;
-		char[] strkey = new char[len + 1];
-		key.CopyTo(0, strkey, 0, len - 0);
-		strkey[len] = (char)0;
-
-		return find(strkey, 0);
-	  }
-
-	  public virtual int find(char[] key, int start)
-	  {
-		int d;
-		char p = root;
-		int i = start;
-		char c;
-
-		while (p != 0)
-		{
-		  if (sc[p] == 0xFFFF)
-		  {
-			if (strcmp(key, i, kv.Array, lo[p]) == 0)
-			{
-			  return eq[p];
-			}
-			else
-			{
-			  return -1;
-			}
-		  }
-		  c = key[i];
-		  d = c - sc[p];
-		  if (d == 0)
-		  {
-			if (c == 0)
-			{
-			  return eq[p];
-			}
-			i++;
-			p = eq[p];
-		  }
-		  else if (d < 0)
-		  {
-			p = lo[p];
-		  }
-		  else
-		  {
-			p = hi[p];
-		  }
-		}
-		return -1;
-	  }
-
-	  public virtual bool knows(string key)
-	  {
-		return (find(key) >= 0);
-	  }
-
-	  // redimension the arrays
-	  private void redimNodeArrays(int newsize)
-	  {
-		int len = newsize < lo.Length ? newsize : lo.Length;
-		char[] na = new char[newsize];
-		Array.Copy(lo, 0, na, 0, len);
-		lo = na;
-		na = new char[newsize];
-		Array.Copy(hi, 0, na, 0, len);
-		hi = na;
-		na = new char[newsize];
-		Array.Copy(eq, 0, na, 0, len);
-		eq = na;
-		na = new char[newsize];
-		Array.Copy(sc, 0, na, 0, len);
-		sc = na;
-	  }
-
-	  public virtual int size()
-	  {
-		return length;
-	  }
-
-	  public override TernaryTree clone()
-	  {
-		TernaryTree t = new TernaryTree();
-		t.lo = this.lo.Clone();
-		t.hi = this.hi.Clone();
-		t.eq = this.eq.Clone();
-		t.sc = this.sc.Clone();
-		t.kv = this.kv.clone();
-		t.root = this.root;
-		t.freenode = this.freenode;
-		t.length = this.length;
-
-		return t;
-	  }
-
-	  /// <summary>
-	  /// Recursively insert the median first and then the median of the lower and
-	  /// upper halves, and so on in order to get a balanced tree. The array of keys
-	  /// is assumed to be sorted in ascending order.
-	  /// </summary>
-	  protected internal virtual void insertBalanced(string[] k, char[] v, int offset, int n)
-	  {
-		int m;
-		if (n < 1)
-		{
-		  return;
-		}
-		m = n >> 1;
-
-		insert(k[m + offset], v[m + offset]);
-		insertBalanced(k, v, offset, m);
-
-		insertBalanced(k, v, offset + m + 1, n - m - 1);
-	  }
-
-	  /// <summary>
-	  /// Balance the tree for best search performance
-	  /// </summary>
-	  public virtual void balance()
-	  {
-		// System.out.print("Before root splitchar = ");
-		// System.out.println(sc[root]);
-
-		int i = 0, n = length;
-		string[] k = new string[n];
-		char[] v = new char[n];
-		Iterator iter = new Iterator(this);
-		while (iter.hasMoreElements())
-		{
-		  v[i] = iter.Value;
-		  k[i++] = iter.nextElement();
-		}
-		init();
-		insertBalanced(k, v, 0, n);
-
-		// With uniform letter distribution sc[root] should be around 'm'
-		// System.out.print("After root splitchar = ");
-		// System.out.println(sc[root]);
-	  }
-
-	  /// <summary>
-	  /// Each node stores a character (splitchar) which is part of some key(s). In a
-	  /// compressed branch (one that only contain a single string key) the trailer
-	  /// of the key which is not already in nodes is stored externally in the kv
-	  /// array. As items are inserted, key substrings decrease. Some substrings may
-	  /// completely disappear when the whole branch is totally decompressed. The
-	  /// tree is traversed to find the key substrings actually used. In addition,
-	  /// duplicate substrings are removed using a map (implemented with a
-	  /// TernaryTree!).
-	  /// 
-	  /// </summary>
-	  public virtual void trimToSize()
-	  {
-		// first balance the tree for best performance
-		balance();
-
-		// redimension the node arrays
-		redimNodeArrays(freenode);
-
-		// ok, compact kv array
-		CharVector kx = new CharVector();
-		kx.alloc(1);
-		TernaryTree map = new TernaryTree();
-		compact(kx, map, root);
-		kv = kx;
-		kv.trimToSize();
-	  }
-
-	  private void compact(CharVector kx, TernaryTree map, char p)
-	  {
-		int k;
-		if (p == 0)
-		{
-		  return;
-		}
-		if (sc[p] == 0xFFFF)
-		{
-		  k = map.find(kv.Array, lo[p]);
-		  if (k < 0)
-		  {
-			k = kx.alloc(strlen(kv.Array, lo[p]) + 1);
-			strcpy(kx.Array, k, kv.Array, lo[p]);
-			map.insert(kx.Array, k, (char) k);
-		  }
-		  lo[p] = (char) k;
-		}
-		else
-		{
-		  compact(kx, map, lo[p]);
-		  if (sc[p] != 0)
-		  {
-			compact(kx, map, eq[p]);
-		  }
-		  compact(kx, map, hi[p]);
-		}
-	  }
-
-	  public virtual IEnumerator<string> keys()
-	  {
-		return new Iterator(this);
-	  }
-
-	  public class Iterator : IEnumerator<string>
-	  {
-		  private readonly TernaryTree outerInstance;
-
-
-		/// <summary>
-		/// current node index
-		/// </summary>
-		internal int cur;
-
-		/// <summary>
-		/// current key
-		/// </summary>
-		internal string curkey;
-
-		private class Item : ICloneable
-		{
-			private readonly TernaryTree.Iterator outerInstance;
-
-		  internal char parent;
-
-		  internal char child;
-
-		  public Item(TernaryTree.Iterator outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-			parent = (char)0;
-			child = (char)0;
-		  }
-
-		  public Item(TernaryTree.Iterator outerInstance, char p, char c)
-		  {
-			  this.outerInstance = outerInstance;
-			parent = p;
-			child = c;
-		  }
-
-		  public override Item clone()
-		  {
-			return new Item(outerInstance, parent, child);
-		  }
-
-		}
-
-		/// <summary>
-		/// Node stack
-		/// </summary>
-		internal Stack<Item> ns;
-
-		/// <summary>
-		/// key stack implemented with a StringBuilder
-		/// </summary>
-		internal StringBuilder ks;
-
-		public Iterator(TernaryTree outerInstance)
-		{
-			this.outerInstance = outerInstance;
-		  cur = -1;
-		  ns = new Stack<>();
-		  ks = new StringBuilder();
-		  rewind();
-		}
-
-		public virtual void rewind()
-		{
-		  ns.removeAllElements();
-		  ks.Length = 0;
-		  cur = outerInstance.root;
-		  run();
-		}
-
-		public override string nextElement()
-		{
-		  string res = curkey;
-		  cur = up();
-		  run();
-		  return res;
-		}
-
-		public virtual char Value
-		{
-			get
-			{
-			  if (cur >= 0)
-			  {
-				return outerInstance.eq[cur];
-			  }
-			  return 0;
-			}
-		}
-
-		public override bool hasMoreElements()
-		{
-		  return (cur != -1);
-		}
-
-		/// <summary>
-		/// traverse upwards
-		/// </summary>
-		internal virtual int up()
-		{
-		  Item i = new Item(this);
-		  int res = 0;
-
-		  if (ns.Count == 0)
-		  {
-			return -1;
-		  }
-
-		  if (cur != 0 && outerInstance.sc[cur] == 0)
-		  {
-			return outerInstance.lo[cur];
-		  }
-
-		  bool climb = true;
-
-		  while (climb)
-		  {
-			i = ns.Pop();
-			i.child++;
-			switch (i.child)
-			{
-			  case 1:
-				if (outerInstance.sc[i.parent] != 0)
-				{
-				  res = outerInstance.eq[i.parent];
-				  ns.Push(i.clone());
-				  ks.Append(outerInstance.sc[i.parent]);
-				}
-				else
-				{
-				  i.child++;
-				  ns.Push(i.clone());
-				  res = outerInstance.hi[i.parent];
-				}
-				climb = false;
-				break;
-
-			  case 2:
-				res = outerInstance.hi[i.parent];
-				ns.Push(i.clone());
-				if (ks.Length > 0)
-				{
-				  ks.Length = ks.Length - 1; // pop
-				}
-				climb = false;
-				break;
-
-			  default:
-				if (ns.Count == 0)
-				{
-				  return -1;
-				}
-				climb = true;
-				break;
-			}
-		  }
-		  return res;
-		}
-
-		/// <summary>
-		/// traverse the tree to find next key
-		/// </summary>
-		internal virtual int run()
-		{
-		  if (cur == -1)
-		  {
-			return -1;
-		  }
-
-		  bool leaf = false;
-		  while (true)
-		  {
-			// first go down on low branch until leaf or compressed branch
-			while (cur != 0)
-			{
-			  if (outerInstance.sc[cur] == 0xFFFF)
-			  {
-				leaf = true;
-				break;
-			  }
-			  ns.Push(new Item(this, (char) cur, '\u0000'));
-			  if (outerInstance.sc[cur] == 0)
-			  {
-				leaf = true;
-				break;
-			  }
-			  cur = outerInstance.lo[cur];
-			}
-			if (leaf)
-			{
-			  break;
-			}
-			// nothing found, go up one node and try again
-			cur = up();
-			if (cur == -1)
-			{
-			  return -1;
-			}
-		  }
-		  // The current node should be a data node and
-		  // the key should be in the key stack (at least partially)
-		  StringBuilder buf = new StringBuilder(ks.ToString());
-		  if (outerInstance.sc[cur] == 0xFFFF)
-		  {
-			int p = outerInstance.lo[cur];
-			while (outerInstance.kv.get(p) != 0)
-			{
-			  buf.Append(outerInstance.kv.get(p++));
-			}
-		  }
-		  curkey = buf.ToString();
-		  return 0;
-		}
-
-	  }
-
-	  public virtual void printStats(PrintStream @out)
-	  {
-		@out.println("Number of keys = " + Convert.ToString(length));
-		@out.println("Node count = " + Convert.ToString(freenode));
-		// System.out.println("Array length = " + Integer.toString(eq.length));
-		@out.println("Key Array length = " + Convert.ToString(kv.length()));
-
-		/*
-		 * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
-		 * System.out.print(kv.get(i)); else System.out.println("");
-		 * System.out.println("Keys:"); for(Enumeration enum = keys();
-		 * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
-		 */
-
-	  }
-	/*
-	  public static void main(String[] args) {
-	    TernaryTree tt = new TernaryTree();
-	    tt.insert("Carlos", 'C');
-	    tt.insert("Car", 'r');
-	    tt.insert("palos", 'l');
-	    tt.insert("pa", 'p');
-	    tt.trimToSize();
-	    System.out.println((char) tt.find("Car"));
-	    System.out.println((char) tt.find("Carlos"));
-	    System.out.println((char) tt.find("alto"));
-	    tt.printStats(System.out);
-	  }
-	  */
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <h2>Ternary Search Tree.</h2>
+    /// 
+    /// <para>
+    /// A ternary search tree is a hybrid between a binary tree and a digital search
+    /// tree (trie). Keys are limited to strings. A data value of type char is stored
+    /// in each leaf node. It can be used as an index (or pointer) to the data.
+    /// Branches that only contain one key are compressed to one node by storing a
+    /// pointer to the trailer substring of the key. This class is intended to serve
+    /// as base class or helper class to implement Dictionary collections or the
+    /// like. Ternary trees have some nice properties as the following: the tree can
+    /// be traversed in sorted order, partial matches (wildcard) can be implemented,
+    /// retrieval of all keys within a given distance from the target, etc. The
+    /// storage requirements are higher than a binary tree but a lot less than a
+    /// trie. Performance is comparable with a hash table, sometimes it outperforms a
+    /// hash function (most of the time can determine a miss faster than a hash).
+    /// </para>
+    /// 
+    /// <para>
+    /// The main purpose of this java port is to serve as a base for implementing
+    /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
+    /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
+    /// tree. The strings patterns are usually small (from 2 to 5 characters), but
+    /// each char in the tree is stored in a node. Thus memory usage is the main
+    /// concern. We will sacrifice 'elegance' to keep memory requirements to the
+    /// minimum. Using java's char type as pointer (yes, I know pointer it is a
+    /// forbidden word in java) we can keep the size of the node to be just 8 bytes
+    /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
+    /// tests the english patterns took 7694 nodes and the german patterns 10055
+    /// nodes, so I think we are safe.
+    /// </para>
+    /// 
+    /// <para>
+    /// All said, this is a map with strings as keys and char as value. Pretty
+    /// limited!. It can be extended to a general map by using the string
+    /// representation of an object and using the char value as an index to an array
+    /// that contains the object values.
+    /// </para>
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+
+    public class TernaryTree : ICloneable
+    {
+        /// <summary>
+        /// We use 4 arrays to represent a node. I guess I should have created a proper
+        /// node class, but somehow Knuth's pascal code made me forget we now have a
+        /// portable language with virtual memory management and automatic garbage
+        /// collection! And now is kind of late, furthermore, if it ain't broken, don't
+        /// fix it.
+        /// </summary>
+
+        /// <summary>
+        /// Pointer to low branch and to rest of the key when it is stored directly in
+        /// this node, we don't have unions in java!
+        /// </summary>
+        protected internal char[] lo;
+
+        /// <summary>
+        /// Pointer to high branch.
+        /// </summary>
+        protected internal char[] hi;
+
+        /// <summary>
+        /// Pointer to equal branch and to data when this node is a string terminator.
+        /// </summary>
+        protected internal char[] eq;
+
+        /// <summary>
+        /// <P>
+        /// The character stored in this node: splitchar. Two special values are
+        /// reserved:
+        /// </P>
+        /// <ul>
+        /// <li>0x0000 as string terminator</li>
+        /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
+        /// </ul>
+        /// <para>
+        /// This shouldn't be a problem if we give the usual semantics to strings since
+        /// 0xFFFF is guaranteed not to be an Unicode character.
+        /// </para>
+        /// </summary>
+        protected internal char[] sc;
+
+        /// <summary>
+        /// This vector holds the trailing of the keys when the branch is compressed.
+        /// </summary>
+        protected internal CharVector kv;
+
+        protected internal char root;
+
+        protected internal char freenode;
+
+        protected internal int length; // number of items in tree
+
+        protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+
+        internal TernaryTree()
+        {
+            Init();
+        }
+
+        protected internal virtual void Init()
+        {
+            root = (char)0;
+            freenode = (char)1;
+            length = 0;
+            lo = new char[BLOCK_SIZE];
+            hi = new char[BLOCK_SIZE];
+            eq = new char[BLOCK_SIZE];
+            sc = new char[BLOCK_SIZE];
+            kv = new CharVector();
+        }
+
+        /// <summary>
+        /// Branches are initially compressed, needing one node per key plus the size
+        /// of the string key. They are decompressed as needed when another key with
+        /// same prefix is inserted. This saves a lot of space, specially for long
+        /// keys.
+        /// </summary>
+        public virtual void Insert(string key, char val)
+        {
+            // make sure we have enough room in the arrays
+            int len = key.Length + 1; // maximum number of nodes that may be generated
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            char[] strkey = new char[len--];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+            root = Insert(root, strkey, 0, val);
+        }
+
+        public virtual void Insert(char[] key, int start, char val)
+        {
+            int len = StrLen(key) + 1;
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            root = Insert(root, key, start, val);
+        }
+
+        /// <summary>
+        /// The actual insertion function, recursive version.
+        /// </summary>
+        private char Insert(char p, char[] key, int start, char val)
+        {
+            int len = StrLen(key, start);
+            if (p == 0)
+            {
+                // this means there is no branch, this node will start a new branch.
+                // Instead of doing that, we store the key somewhere else and create
+                // only one node with a pointer to the key
+                p = freenode++;
+                eq[p] = val; // holds data
+                length++;
+                hi[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = (char)0xFFFF; // indicates branch is compressed
+                    lo[p] = (char)kv.Alloc(len + 1); // use 'lo' to hold pointer to key
+                    StrCpy(kv.Array, lo[p], key, start);
+                }
+                else
+                {
+                    sc[p] = (char)0;
+                    lo[p] = (char)0;
+                }
+                return p;
+            }
+
+            if (sc[p] == 0xFFFF)
+            {
+                // branch is compressed: need to decompress
+                // this will generate garbage in the external key array
+                // but we can do some garbage collection later
+                char pp = freenode++;
+                lo[pp] = lo[p]; // previous pointer to key
+                eq[pp] = eq[p]; // previous pointer to data
+                lo[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = kv[lo[pp]];
+                    eq[p] = pp;
+                    lo[pp]++;
+                    if (kv[lo[pp]] == 0)
+                    {
+                        // key completly decompressed leaving garbage in key array
+                        lo[pp] = (char)0;
+                        sc[pp] = (char)0;
+                        hi[pp] = (char)0;
+                    }
+                    else
+                    {
+                        // we only got first char of key, rest is still there
+                        sc[pp] = (char)0xFFFF;
+                    }
+                }
+                else
+                {
+                    // In this case we can save a node by swapping the new node
+                    // with the compressed node
+                    sc[pp] = (char)0xFFFF;
+                    hi[p] = pp;
+                    sc[p] = (char)0;
+                    eq[p] = val;
+                    length++;
+                    return p;
+                }
+            }
+            char s = key[start];
+            if (s < sc[p])
+            {
+                lo[p] = Insert(lo[p], key, start, val);
+            }
+            else if (s == sc[p])
+            {
+                if (s != 0)
+                {
+                    eq[p] = Insert(eq[p], key, start + 1, val);
+                }
+                else
+                {
+                    // key already in tree, overwrite data
+                    eq[p] = val;
+                }
+            }
+            else
+            {
+                hi[p] = Insert(hi[p], key, start, val);
+            }
+            return p;
+        }
+
+        /// <summary>
+        /// Compares 2 null terminated char arrays
+        /// </summary>
+        public static int StrCmp(char[] a, int startA, char[] b, int startB)
+        {
+            for (; a[startA] == b[startB]; startA++, startB++)
+            {
+                if (a[startA] == 0)
+                {
+                    return 0;
+                }
+            }
+            return a[startA] - b[startB];
+        }
+
+        /// <summary>
+        /// Compares a string with null terminated char array
+        /// </summary>
+        public static int StrCmp(string str, char[] a, int start)
+        {
+            int i, d, len = str.Length;
+            for (i = 0; i < len; i++)
+            {
+                d = (int)str[i] - a[start + i];
+                if (d != 0)
+                {
+                    return d;
+                }
+                if (a[start + i] == 0)
+                {
+                    return d;
+                }
+            }
+            if (a[start + i] != 0)
+            {
+                return -a[start + i];
+            }
+            return 0;
+
+        }
+
+        public static void StrCpy(char[] dst, int di, char[] src, int si)
+        {
+            while (src[si] != 0)
+            {
+                dst[di++] = src[si++];
+            }
+            dst[di] = (char)0;
+        }
+
+        public static int StrLen(char[] a, int start)
+        {
+            int len = 0;
+            for (int i = start; i < a.Length && a[i] != 0; i++)
+            {
+                len++;
+            }
+            return len;
+        }
+
+        public static int StrLen(char[] a)
+        {
+            return StrLen(a, 0);
+        }
+
+        public virtual int Find(string key)
+        {
+            int len = key.Length;
+            char[] strkey = new char[len + 1];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+
+            return Find(strkey, 0);
+        }
+
+        public virtual int Find(char[] key, int start)
+        {
+            int d;
+            char p = root;
+            int i = start;
+            char c;
+
+            while (p != 0)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (StrCmp(key, i, kv.Array, lo[p]) == 0)
+                    {
+                        return eq[p];
+                    }
+                    else
+                    {
+                        return -1;
+                    }
+                }
+                c = key[i];
+                d = c - sc[p];
+                if (d == 0)
+                {
+                    if (c == 0)
+                    {
+                        return eq[p];
+                    }
+                    i++;
+                    p = eq[p];
+                }
+                else if (d < 0)
+                {
+                    p = lo[p];
+                }
+                else
+                {
+                    p = hi[p];
+                }
+            }
+            return -1;
+        }
+
+        public virtual bool Knows(string key)
+        {
+            return (Find(key) >= 0);
+        }
+
+        // redimension the arrays
+        private void RedimNodeArrays(int newsize)
+        {
+            int len = newsize < lo.Length ? newsize : lo.Length;
+            char[] na = new char[newsize];
+            Array.Copy(lo, 0, na, 0, len);
+            lo = na;
+            na = new char[newsize];
+            Array.Copy(hi, 0, na, 0, len);
+            hi = na;
+            na = new char[newsize];
+            Array.Copy(eq, 0, na, 0, len);
+            eq = na;
+            na = new char[newsize];
+            Array.Copy(sc, 0, na, 0, len);
+            sc = na;
+        }
+
+        public virtual int Length
+        {
+            get { return length; }
+        }
+
+        public object Clone()
+        {
+            TernaryTree t = new TernaryTree();
+            t.lo = (char[])this.lo.Clone();
+            t.hi = (char[])this.hi.Clone();
+            t.eq = (char[])this.eq.Clone();
+            t.sc = (char[])this.sc.Clone();
+            t.kv = (CharVector)this.kv.Clone();
+            t.root = this.root;
+            t.freenode = this.freenode;
+            t.length = this.length;
+
+            return t;
+        }
+
+        /// <summary>
+        /// Recursively insert the median first and then the median of the lower and
+        /// upper halves, and so on in order to get a balanced tree. The array of keys
+        /// is assumed to be sorted in ascending order.
+        /// </summary>
+        protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
+        {
+            int m;
+            if (n < 1)
+            {
+                return;
+            }
+            m = n >> 1;
+
+            Insert(k[m + offset], v[m + offset]);
+            InsertBalanced(k, v, offset, m);
+
+            InsertBalanced(k, v, offset + m + 1, n - m - 1);
+        }
+
+        /// <summary>
+        /// Balance the tree for best search performance
+        /// </summary>
+        public virtual void Balance()
+        {
+            // System.out.print("Before root splitchar = ");
+            // System.out.println(sc[root]);
+
+            int i = 0, n = length;
+            string[] k = new string[n];
+            char[] v = new char[n];
+            Iterator iter = new Iterator(this);
+            while (iter.HasMoreElements())
+            {
+                v[i] = iter.Value;
+                iter.MoveNext();
+                k[i++] = iter.Current;
+            }
+            Init();
+            InsertBalanced(k, v, 0, n);
+
+            // With uniform letter distribution sc[root] should be around 'm'
+            // System.out.print("After root splitchar = ");
+            // System.out.println(sc[root]);
+        }
+
+        /// <summary>
+        /// Each node stores a character (splitchar) which is part of some key(s). In a
+        /// compressed branch (one that only contain a single string key) the trailer
+        /// of the key which is not already in nodes is stored externally in the kv
+        /// array. As items are inserted, key substrings decrease. Some substrings may
+        /// completely disappear when the whole branch is totally decompressed. The
+        /// tree is traversed to find the key substrings actually used. In addition,
+        /// duplicate substrings are removed using a map (implemented with a
+        /// TernaryTree!).
+        /// 
+        /// </summary>
+        public virtual void TrimToSize()
+        {
+            // first balance the tree for best performance
+            Balance();
+
+            // redimension the node arrays
+            RedimNodeArrays(freenode);
+
+            // ok, compact kv array
+            CharVector kx = new CharVector();
+            kx.Alloc(1);
+            TernaryTree map = new TernaryTree();
+            Compact(kx, map, root);
+            kv = kx;
+            kv.TrimToSize();
+        }
+
+        private void Compact(CharVector kx, TernaryTree map, char p)
+        {
+            int k;
+            if (p == 0)
+            {
+                return;
+            }
+            if (sc[p] == 0xFFFF)
+            {
+                k = map.Find(kv.Array, lo[p]);
+                if (k < 0)
+                {
+                    k = kx.Alloc(StrLen(kv.Array, lo[p]) + 1);
+                    StrCpy(kx.Array, k, kv.Array, lo[p]);
+                    map.Insert(kx.Array, k, (char)k);
+                }
+                lo[p] = (char)k;
+            }
+            else
+            {
+                Compact(kx, map, lo[p]);
+                if (sc[p] != 0)
+                {
+                    Compact(kx, map, eq[p]);
+                }
+                Compact(kx, map, hi[p]);
+            }
+        }
+
+        public virtual IEnumerator<string> Keys()
+        {
+            return new Iterator(this);
+        }
+
+        public class Iterator : IEnumerator<string>
+        {
+            private readonly TernaryTree outerInstance;
+
+
+            /// <summary>
+            /// current node index
+            /// </summary>
+            private int cur;
+
+            /// <summary>
+            /// current key
+            /// </summary>
+            private string curkey;
+
+            internal class Item : ICloneable
+            {
+                internal char parent;
+                internal char child;
+
+                public Item()
+                {
+                    parent = (char)0;
+                    child = (char)0;
+                }
+
+                public Item(char p, char c)
+                {
+                    parent = p;
+                    child = c;
+                }
+
+                public object Clone()
+                {
+                    return new Item(parent, child);
+                }
+
+            }
+
+            /// <summary>
+            /// Node stack
+            /// </summary>
+            internal Stack<Item> ns;
+
+            /// <summary>
+            /// key stack implemented with a StringBuilder
+            /// </summary>
+            internal StringBuilder ks;
+
+            public Iterator(TernaryTree outerInstance)
+            {
+                this.outerInstance = outerInstance;
+                cur = -1;
+                ns = new Stack<Item>();
+                ks = new StringBuilder();
+                Rewind();
+            }
+
+            public virtual void Rewind()
+            {
+                ns.Clear();
+                ks.Length = 0;
+                cur = outerInstance.root;
+                Run();
+            }
+
+            //public override string NextElement()
+            //{
+            //  string res = curkey;
+            //  cur = up();
+            //  run();
+            //  return res;
+            //}
+
+            public virtual char Value
+            {
+                get
+                {
+                    if (cur >= 0)
+                    {
+                        return outerInstance.eq[cur];
+                    }
+                    return (char)0;
+                }
+            }
+
+
+
+            public bool HasMoreElements()
+            {
+                return (cur != -1);
+            }
+
+            /// <summary>
+            /// traverse upwards
+            /// </summary>
+            internal virtual int Up()
+            {
+                Item i = new Item();
+                int res = 0;
+
+                if (ns.Count == 0)
+                {
+                    return -1;
+                }
+
+                if (cur != 0 && outerInstance.sc[cur] == 0)
+                {
+                    return outerInstance.lo[cur];
+                }
+
+                bool climb = true;
+
+                while (climb)
+                {
+                    i = ns.Pop();
+                    i.child++;
+                    switch ((int)i.child)
+                    {
+                        case 1:
+                            if (outerInstance.sc[i.parent] != 0)
+                            {
+                                res = outerInstance.eq[i.parent];
+                                ns.Push((Item)i.Clone());
+                                ks.Append(outerInstance.sc[i.parent]);
+                            }
+                            else
+                            {
+                                i.child++;
+                                ns.Push((Item)i.Clone());
+                                res = outerInstance.hi[i.parent];
+                            }
+                            climb = false;
+                            break;
+
+                        case 2:
+                            res = outerInstance.hi[i.parent];
+                            ns.Push((Item)i.Clone());
+                            if (ks.Length > 0)
+                            {
+                                ks.Length = ks.Length - 1; // pop
+                            }
+                            climb = false;
+                            break;
+
+                        default:
+                            if (ns.Count == 0)
+                            {
+                                return -1;
+                            }
+                            climb = true;
+                            break;
+                    }
+                }
+                return res;
+            }
+
+            /// <summary>
+            /// traverse the tree to find next key
+            /// </summary>
+            internal virtual int Run()
+            {
+                if (cur == -1)
+                {
+                    return -1;
+                }
+
+                bool leaf = false;
+                while (true)
+                {
+                    // first go down on low branch until leaf or compressed branch
+                    while (cur != 0)
+                    {
+                        if (outerInstance.sc[cur] == 0xFFFF)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        ns.Push(new Item((char)cur, '\u0000'));
+                        if (outerInstance.sc[cur] == 0)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        cur = outerInstance.lo[cur];
+                    }
+                    if (leaf)
+                    {
+                        break;
+                    }
+                    // nothing found, go up one node and try again
+                    cur = Up();
+                    if (cur == -1)
+                    {
+                        return -1;
+                    }
+                }
+                // The current node should be a data node and
+                // the key should be in the key stack (at least partially)
+                StringBuilder buf = new StringBuilder(ks.ToString());
+                if (outerInstance.sc[cur] == 0xFFFF)
+                {
+                    int p = outerInstance.lo[cur];
+                    while (outerInstance.kv[p] != 0)
+                    {
+                        buf.Append(outerInstance.kv[p++]);
+                    }
+                }
+                curkey = buf.ToString();
+                return 0;
+            }
+
+            #region Added for better .NET support
+            public string Current
+            {
+                get
+                {
+                    return curkey;
+                }
+            }
+
+            object IEnumerator.Current
+            {
+                get
+                {
+                    return curkey;
+                }
+            }
+
+            public void Dispose()
+            {
+                // nothing to do
+            }
+
+            public bool MoveNext()
+            {
+                cur = Up();
+                Run();
+                return cur != -1;
+            }
+
+            public void Reset()
+            {
+                throw new NotImplementedException();
+            }
+
+            #endregion
+        }
+
+        // LUCENENET: Not sure we really need this
+        // public virtual void printStats(PrintStream @out)
+        // {
+        //@out.println("Number of keys = " + Convert.ToString(length));
+        //@out.println("Node count = " + Convert.ToString(freenode));
+        //// System.out.println("Array length = " + Integer.toString(eq.length));
+        //@out.println("Key Array length = " + Convert.ToString(kv.length()));
+
+        ///*
+        // * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
+        // * System.out.print(kv.get(i)); else System.out.println("");
+        // * System.out.println("Keys:"); for(Enumeration enum = keys();
+        // * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
+        // */
+
+        // }
+        /*
+          public static void main(String[] args) {
+            TernaryTree tt = new TernaryTree();
+            tt.insert("Carlos", 'C');
+            tt.insert("Car", 'r');
+            tt.insert("palos", 'l');
+            tt.insert("pa", 'p');
+            tt.trimToSize();
+            System.out.println((char) tt.find("Car"));
+            System.out.println((char) tt.find("Carlos"));
+            System.out.println((char) tt.find("alto"));
+            tt.printStats(System.out);
+          }
+          */
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
new file mode 100644
index 0000000..083c2bd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/hyphenation.dtd
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!-- $Id: hyphenation.dtd,v 1.3 2004/02/27 18:34:59 jeremias Exp $ -->
+
+<!ELEMENT hyphenation-info (hyphen-char?, hyphen-min?,
+                           classes, exceptions?, patterns)>
+
+<!-- Hyphen character to be used in the exception list as shortcut for
+     <hyphen pre-break="-"/>. Defaults to '-'
+-->
+<!ELEMENT hyphen-char EMPTY>
+<!ATTLIST hyphen-char value CDATA #REQUIRED>
+
+<!-- Default minimun length in characters of hyphenated word fragments
+     before and after the line break. For some languages this is not
+     only for aesthetic purposes, wrong hyphens may be generated if this
+     is not accounted for.
+-->
+<!ELEMENT hyphen-min EMPTY>
+<!ATTLIST hyphen-min before CDATA #REQUIRED>
+<!ATTLIST hyphen-min after CDATA #REQUIRED>
+
+<!-- Character equivalent classes: space separated list of character groups, all
+     characters in a group are to be treated equivalent as far as
+     the hyphenation algorithm is concerned. The first character in a group
+     is the group's equivalent character. Patterns should only contain
+     first characters. It also defines word characters, i.e. a word that
+     contains characters not present in any of the classes is not hyphenated.
+-->
+<!ELEMENT classes (#PCDATA)>
+
+<!-- Hyphenation exceptions: space separated list of hyphenated words.
+     A hyphen is indicated by the hyphen tag, but you can use the
+     hyphen-char defined previously as shortcut. This is in cases
+     when the algorithm procedure finds wrong hyphens or you want
+     to provide your own hyphenation for some words.
+-->
+<!ELEMENT exceptions (#PCDATA|hyphen)* >
+
+<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
+     characters as described before, between any two word characters a digit
+     in the range 0 to 9 may be specified. The absence of a digit is equivalent
+     to zero. The '.' character is reserved to indicate begining or ending
+     of words. -->
+<!ELEMENT patterns (#PCDATA)>
+
+<!-- A "full hyphen" equivalent to TeX's \discretionary
+     with pre-break, post-break and no-break attributes.
+     To be used in the exceptions list, the hyphen character is not
+     automatically added -->
+<!ELEMENT hyphen EMPTY>
+<!ATTLIST hyphen pre CDATA #IMPLIED>
+<!ATTLIST hyphen no CDATA #IMPLIED>
+<!ATTLIST hyphen post CDATA #IMPLIED>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index a74ed0b..615d1a0 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -42,6 +42,8 @@
     <Reference Include="System" />
     <Reference Include="System.Core" />
     <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Xml" />
+    <Reference Include="System.Xml.Linq" />
   </ItemGroup>
   <ItemGroup>
     <Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
@@ -91,6 +93,19 @@
     <Compile Include="Analysis\CommonGrams\CommonGramsFilterFactory.cs" />
     <Compile Include="Analysis\CommonGrams\CommonGramsQueryFilter.cs" />
     <Compile Include="Analysis\CommonGrams\CommonGramsQueryFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\CompoundWordTokenFilterBase.cs" />
+    <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\DictionaryCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilter.cs" />
+    <Compile Include="Analysis\Compound\HyphenationCompoundWordTokenFilterFactory.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\ByteVector.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\CharVector.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\Hyphen.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\Hyphenation.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\HyphenationTree.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\PatternConsumer.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\PatternParser.cs" />
+    <Compile Include="Analysis\Compound\Hyphenation\TernaryTree.cs" />
     <Compile Include="Analysis\Core\KeywordAnalyzer.cs" />
     <Compile Include="Analysis\Core\KeywordTokenizer.cs" />
     <Compile Include="Analysis\Core\KeywordTokenizerFactory.cs" />
@@ -481,6 +496,7 @@
   <ItemGroup>
     <EmbeddedResource Include="Analysis\Gl\galician.rslp" />
     <EmbeddedResource Include="Analysis\Pt\portuguese.rslp" />
+    <EmbeddedResource Include="Analysis\Compound\Hyphenation\hyphenation.dtd" />
     <None Include="packages.config" />
   </ItemGroup>
   <ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
index bca2d65..76e9090 100644
--- a/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
+++ b/src/Lucene.Net.Core/Analysis/Tokenattributes/ICharTermAttribute.cs
@@ -75,6 +75,13 @@ namespace Lucene.Net.Analysis.Tokenattributes
         ICharTermAttribute SetEmpty();
 
         // the following methods are redefined to get rid of IOException declaration:
+		
+        /// <summary>
+        /// Appends the contents of the {@code ICharSequence} to this character sequence.
+        /// <p>The characters of the {@code ICharSequence} argument are appended, in order, increasing the length of
+        /// this sequence by the length of the argument. If argument is {@code null}, then the four
+        /// characters {@code "null"} are appended.
+        /// </summary>
         ICharTermAttribute Append(ICharSequence csq);
 
         ICharTermAttribute Append(ICharSequence csq, int start, int end);
@@ -106,5 +113,7 @@ namespace Lucene.Net.Analysis.Tokenattributes
         /// characters {@code "null"} are appended.
         /// </summary>
         ICharTermAttribute Append(ICharTermAttribute termAtt);
+
+        ICharSequence SubSequence(int start, int end);
     }
 }
\ No newline at end of file


[49/50] [abbrv] lucenenet git commit: Added a CompareToOrdinal method to StringSupport to fix string comparison bugs introduced by using string.CompareTo (which isn't similar to Java's String.compareTo method).

Posted by sy...@apache.org.
Added a CompareToOrdinal method to StringSupport to fix string comparison bugs introduced by using string.CompareTo (which isn't similar to Java's String.compareTo method).


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1995da25
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1995da25
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1995da25

Branch: refs/heads/analysis-work
Commit: 1995da25378f745a7868a4d3924134bbe628b0a5
Parents: 053d3ef
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 18:24:39 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 18:24:39 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Core/Support/StringSupport.cs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1995da25/src/Lucene.Net.Core/Support/StringSupport.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/StringSupport.cs b/src/Lucene.Net.Core/Support/StringSupport.cs
index 94f9108..8683b96 100644
--- a/src/Lucene.Net.Core/Support/StringSupport.cs
+++ b/src/Lucene.Net.Core/Support/StringSupport.cs
@@ -14,5 +14,28 @@ namespace Lucene.Net.Support
         {
             return new BytesRef(str.GetBytes(enc));
         }
+
+        /// <summary>
+        /// This method mimics the Java String.compareTo(String) method in that it
+        /// <list type="number">
+        /// <item>Compares the strings using lexographic sorting rules</item>
+        /// <item>Performs a culture-insensitive comparison</item>
+        /// </list>
+        /// This method is a convenience to replace the .NET CompareTo method 
+        /// on all strings, provided the logic does not expect specific values
+        /// but is simply comparing them with <code>></code> or <code><</code>.
+        /// </summary>
+        /// <param name="str"></param>
+        /// <param name="value">The string to compare with.</param>
+        /// <returns>
+        /// An integer that indicates the lexical relationship between the two comparands.
+        /// Less than zero indicates the comparison value is greater than the current string.
+        /// Zero indicates the strings are equal.
+        /// Greater than zero indicates the comparison value is less than the current string.
+        /// </returns>
+        public static int CompareToOrdinal(this string str, string value)
+        {
+            return string.CompareOrdinal(str, value);
+        }
     }
 }
\ No newline at end of file


[24/50] [abbrv] lucenenet git commit: Implemented missing Join.TermsCollector.AcceptsDocsOutOfOrder() method.

Posted by sy...@apache.org.
Implemented missing Join.TermsCollector.AcceptsDocsOutOfOrder() method.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3612a6e2
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3612a6e2
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3612a6e2

Branch: refs/heads/analysis-work
Commit: 3612a6e29333610c204d15f8461857a2e35cdf7f
Parents: 5b48b11
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 08:03:58 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 08:03:58 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Join/TermsCollector.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3612a6e2/src/Lucene.Net.Join/TermsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Join/TermsCollector.cs b/src/Lucene.Net.Join/TermsCollector.cs
index 2ccf1ed..8aa396d 100644
--- a/src/Lucene.Net.Join/TermsCollector.cs
+++ b/src/Lucene.Net.Join/TermsCollector.cs
@@ -93,7 +93,7 @@ namespace Lucene.Net.Join
 
             public override bool AcceptsDocsOutOfOrder()
             {
-                throw new System.NotImplementedException();
+                return true;
             }
         }
 


[06/50] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index ff6f4e2..05c2a26 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -1,12 +1,19 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using Lucene.Net.Util.Fst;
+using System;
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
 using System.Text;
+using System.Text.RegularExpressions;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -23,1213 +30,1155 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-	using ByteArrayDataOutput = org.apache.lucene.store.ByteArrayDataOutput;
-	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
-	using BytesRef = org.apache.lucene.util.BytesRef;
-	using BytesRefHash = org.apache.lucene.util.BytesRefHash;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using IntsRef = org.apache.lucene.util.IntsRef;
-	using OfflineSorter = org.apache.lucene.util.OfflineSorter;
-	using ByteSequencesReader = org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
-	using ByteSequencesWriter = org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
-	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
-	using RegExp = org.apache.lucene.util.automaton.RegExp;
-	using Builder = org.apache.lucene.util.fst.Builder;
-	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
-	using FST = org.apache.lucene.util.fst.FST;
-	using IntSequenceOutputs = org.apache.lucene.util.fst.IntSequenceOutputs;
-	using Outputs = org.apache.lucene.util.fst.Outputs;
-	using Util = org.apache.lucene.util.fst.Util;
-
-
-	/// <summary>
-	/// In-memory structure for the dictionary (.dic) and affix (.aff)
-	/// data of a hunspell dictionary.
-	/// </summary>
-	public class Dictionary
-	{
-
-	  internal static readonly char[] NOFLAGS = new char[0];
-
-	  private const string ALIAS_KEY = "AF";
-	  private const string PREFIX_KEY = "PFX";
-	  private const string SUFFIX_KEY = "SFX";
-	  private const string FLAG_KEY = "FLAG";
-	  private const string COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
-	  private const string CIRCUMFIX_KEY = "CIRCUMFIX";
-	  private const string IGNORE_KEY = "IGNORE";
-	  private const string ICONV_KEY = "ICONV";
-	  private const string OCONV_KEY = "OCONV";
-
-	  private const string NUM_FLAG_TYPE = "num";
-	  private const string UTF8_FLAG_TYPE = "UTF-8";
-	  private const string LONG_FLAG_TYPE = "long";
-
-	  // TODO: really for suffixes we should reverse the automaton and run them backwards
-	  private const string PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
-	  private const string SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
-
-	  internal FST<IntsRef> prefixes;
-	  internal FST<IntsRef> suffixes;
-
-	  // all condition checks used by prefixes and suffixes. these are typically re-used across
-	  // many affix stripping rules. so these are deduplicated, to save RAM.
-	  internal List<CharacterRunAutomaton> patterns = new List<CharacterRunAutomaton>();
-
-	  // the entries in the .dic file, mapping to their set of flags.
-	  // the fst output is the ordinal list for flagLookup
-	  internal FST<IntsRef> words;
-	  // the list of unique flagsets (wordforms). theoretically huge, but practically
-	  // small (e.g. for polish this is 756), otherwise humans wouldn't be able to deal with it either.
-	  internal BytesRefHash flagLookup = new BytesRefHash();
-
-	  // the list of unique strip affixes.
-	  internal char[] stripData;
-	  internal int[] stripOffsets;
-
-	  // 8 bytes per affix
-	  internal sbyte[] affixData = new sbyte[64];
-	  private int currentAffix = 0;
-
-	  private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
-
-	  private string[] aliases;
-	  private int aliasCount = 0;
-
-	  private readonly File tempDir = OfflineSorter.defaultTempDir(); // TODO: make this configurable?
-
-	  internal bool ignoreCase;
-	  internal bool complexPrefixes;
-	  internal bool twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
-
-	  internal int circumfix = -1; // circumfix flag, or -1 if one is not defined
-
-	  // ignored characters (dictionary, affix, inputs)
-	  private char[] ignore;
-
-	  // FSTs used for ICONV/OCONV, output ord pointing to replacement text
-	  internal FST<CharsRef> iconv;
-	  internal FST<CharsRef> oconv;
-
-	  internal bool needsInputCleaning;
-	  internal bool needsOutputCleaning;
-
-	  /// <summary>
-	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
-	  /// and dictionary files.
-	  /// You have to close the provided InputStreams yourself.
-	  /// </summary>
-	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
-	  /// <param name="dictionary"> InputStream for reading the hunspell dictionary file (won't be closed). </param>
-	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
-	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.io.InputStream dictionary) throws java.io.IOException, java.text.ParseException
-	  public Dictionary(InputStream affix, InputStream dictionary) : this(affix, Collections.singletonList(dictionary), false)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
-	  /// and dictionary files.
-	  /// You have to close the provided InputStreams yourself.
-	  /// </summary>
-	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
-	  /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param>
-	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
-	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.util.List<java.io.InputStream> dictionaries, boolean ignoreCase) throws java.io.IOException, java.text.ParseException
-	  public Dictionary(InputStream affix, IList<InputStream> dictionaries, bool ignoreCase)
-	  {
-		this.ignoreCase = ignoreCase;
-		this.needsInputCleaning = ignoreCase;
-		this.needsOutputCleaning = false; // set if we have an OCONV
-		flagLookup.add(new BytesRef()); // no flags -> ord 0
-
-		File aff = File.createTempFile("affix", "aff", tempDir);
-		OutputStream @out = new BufferedOutputStream(new FileOutputStream(aff));
-		InputStream aff1 = null;
-		InputStream aff2 = null;
-		try
-		{
-		  // copy contents of affix stream to temp file
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final byte [] buffer = new byte [1024 * 8];
-		  sbyte[] buffer = new sbyte [1024 * 8];
-		  int len;
-		  while ((len = affix.read(buffer)) > 0)
-		  {
-			@out.write(buffer, 0, len);
-		  }
-		  @out.close();
-
-		  // pass 1: get encoding
-		  aff1 = new BufferedInputStream(new FileInputStream(aff));
-		  string encoding = getDictionaryEncoding(aff1);
-
-		  // pass 2: parse affixes
-		  CharsetDecoder decoder = getJavaEncoding(encoding);
-		  aff2 = new BufferedInputStream(new FileInputStream(aff));
-		  readAffixFile(aff2, decoder);
-
-		  // read dictionary entries
-		  IntSequenceOutputs o = IntSequenceOutputs.Singleton;
-		  Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
-		  readDictionaryFiles(dictionaries, decoder, b);
-		  words = b.finish();
-		  aliases = null; // no longer needed
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(@out, aff1, aff2);
-		  aff.delete();
-		}
-	  }
-
-	  /// <summary>
-	  /// Looks up Hunspell word forms from the dictionary
-	  /// </summary>
-	  internal virtual IntsRef lookupWord(char[] word, int offset, int length)
-	  {
-		return lookup(words, word, offset, length);
-	  }
-
-	  /// <summary>
-	  /// Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length
-	  /// </summary>
-	  /// <param name="word"> Char array to generate the String from </param>
-	  /// <param name="offset"> Offset in the char array that the String starts at </param>
-	  /// <param name="length"> Length from the offset that the String is </param>
-	  /// <returns> List of HunspellAffix prefixes with an append that matches the String, or {@code null} if none are found </returns>
-	  internal virtual IntsRef lookupPrefix(char[] word, int offset, int length)
-	  {
-		return lookup(prefixes, word, offset, length);
-	  }
-
-	  /// <summary>
-	  /// Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length
-	  /// </summary>
-	  /// <param name="word"> Char array to generate the String from </param>
-	  /// <param name="offset"> Offset in the char array that the String starts at </param>
-	  /// <param name="length"> Length from the offset that the String is </param>
-	  /// <returns> List of HunspellAffix suffixes with an append that matches the String, or {@code null} if none are found </returns>
-	  internal virtual IntsRef lookupSuffix(char[] word, int offset, int length)
-	  {
-		return lookup(suffixes, word, offset, length);
-	  }
-
-	  // TODO: this is pretty stupid, considering how the stemming algorithm works
-	  // we can speed it up to be significantly faster!
-	  internal virtual IntsRef lookup(FST<IntsRef> fst, char[] word, int offset, int length)
-	  {
-		if (fst == null)
-		{
-		  return null;
-		}
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
-		FST.BytesReader bytesReader = fst.BytesReader;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef> arc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef>());
-		FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
-		// Accumulate output as we go
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
-		IntsRef NO_OUTPUT = fst.outputs.NoOutput;
-		IntsRef output = NO_OUTPUT;
-
-		int l = offset + length;
-		try
-		{
-		  for (int i = offset, cp = 0; i < l; i += char.charCount(cp))
-		  {
-			cp = char.codePointAt(word, i, l);
-			if (fst.findTargetArc(cp, arc, arc, bytesReader) == null)
-			{
-			  return null;
-			}
-			else if (arc.output != NO_OUTPUT)
-			{
-			  output = fst.outputs.add(output, arc.output);
-			}
-		  }
-		  if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null)
-		  {
-			return null;
-		  }
-		  else if (arc.output != NO_OUTPUT)
-		  {
-			return fst.outputs.add(output, arc.output);
-		  }
-		  else
-		  {
-			return output;
-		  }
-		}
-		catch (IOException bogus)
-		{
-		  throw new Exception(bogus);
-		}
-	  }
-
-	  /// <summary>
-	  /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps
-	  /// </summary>
-	  /// <param name="affixStream"> InputStream to read the content of the affix file from </param>
-	  /// <param name="decoder"> CharsetDecoder to decode the content of the file </param>
-	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void readAffixFile(java.io.InputStream affixStream, java.nio.charset.CharsetDecoder decoder) throws java.io.IOException, java.text.ParseException
-	  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder)
-	  {
-		SortedDictionary<string, IList<char?>> prefixes = new SortedDictionary<string, IList<char?>>();
-		SortedDictionary<string, IList<char?>> suffixes = new SortedDictionary<string, IList<char?>>();
-		IDictionary<string, int?> seenPatterns = new Dictionary<string, int?>();
-
-		// zero condition -> 0 ord
-		seenPatterns[".*"] = 0;
-		patterns.Add(null);
-
-		// zero strip -> 0 ord
-		IDictionary<string, int?> seenStrips = new LinkedHashMap<string, int?>();
-		seenStrips[""] = 0;
-
-		LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
-		string line = null;
-		while ((line = reader.readLine()) != null)
-		{
-		  // ignore any BOM marker on first line
-		  if (reader.LineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal))
-		  {
-			line = line.Substring(1);
-		  }
-		  if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal))
-		  {
-			parseAlias(line);
-		  }
-		  else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
-		  {
-			parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
-		  }
-		  else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal))
-		  {
-			parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
-		  }
-		  else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal))
-		  {
-			// Assume that the FLAG line comes before any prefix or suffixes
-			// Store the strategy so it can be used when parsing the dic file
-			flagParsingStrategy = getFlagParsingStrategy(line);
-		  }
-		  else if (line.Equals(COMPLEXPREFIXES_KEY))
-		  {
-			complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
-		  }
-		  else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal))
-		  {
-			string[] parts = line.Split("\\s+", true);
-			if (parts.Length != 2)
-			{
-			  throw new ParseException("Illegal CIRCUMFIX declaration", reader.LineNumber);
-			}
-			circumfix = flagParsingStrategy.parseFlag(parts[1]);
-		  }
-		  else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
-		  {
-			string[] parts = line.Split("\\s+", true);
-			if (parts.Length != 2)
-			{
-			  throw new ParseException("Illegal IGNORE declaration", reader.LineNumber);
-			}
-			ignore = parts[1].ToCharArray();
-			Arrays.sort(ignore);
-			needsInputCleaning = true;
-		  }
-		  else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal))
-		  {
-			string[] parts = line.Split("\\s+", true);
-			string type = parts[0];
-			if (parts.Length != 2)
-			{
-			  throw new ParseException("Illegal " + type + " declaration", reader.LineNumber);
-			}
-			int num = int.Parse(parts[1]);
-			FST<CharsRef> res = parseConversions(reader, num);
-			if (type.Equals("ICONV"))
-			{
-			  iconv = res;
-			  needsInputCleaning |= iconv != null;
-			}
-			else
-			{
-			  oconv = res;
-			  needsOutputCleaning |= oconv != null;
-			}
-		  }
-		}
-
-		this.prefixes = affixFST(prefixes);
-		this.suffixes = affixFST(suffixes);
-
-		int totalChars = 0;
-		foreach (string strip in seenStrips.Keys)
-		{
-		  totalChars += strip.Length;
-		}
-		stripData = new char[totalChars];
-		stripOffsets = new int[seenStrips.Count + 1];
-		int currentOffset = 0;
-		int currentIndex = 0;
-		foreach (string strip in seenStrips.Keys)
-		{
-		  stripOffsets[currentIndex++] = currentOffset;
-		  strip.CopyTo(0, stripData, currentOffset, strip.Length - 0);
-		  currentOffset += strip.Length;
-		}
-		Debug.Assert(currentIndex == seenStrips.Count);
-		stripOffsets[currentIndex] = currentOffset;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.IntsRef> affixFST(java.util.TreeMap<String,java.util.List<Character>> affixes) throws java.io.IOException
-	  private FST<IntsRef> affixFST(SortedDictionary<string, IList<char?>> affixes)
-	  {
-		IntSequenceOutputs outputs = IntSequenceOutputs.Singleton;
-		Builder<IntsRef> builder = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, outputs);
-
-		IntsRef scratch = new IntsRef();
-		foreach (KeyValuePair<string, IList<char?>> entry in affixes.SetOfKeyValuePairs())
-		{
-		  Util.toUTF32(entry.Key, scratch);
-		  IList<char?> entries = entry.Value;
-		  IntsRef output = new IntsRef(entries.Count);
-		  foreach (char? c in entries)
-		  {
-			output.ints[output.length++] = c;
-		  }
-		  builder.add(scratch, output);
-		}
-		return builder.finish();
-	  }
-
-	  /// <summary>
-	  /// Parses a specific affix rule putting the result into the provided affix map
-	  /// </summary>
-	  /// <param name="affixes"> Map where the result of the parsing will be put </param>
-	  /// <param name="header"> Header line of the affix rule </param>
-	  /// <param name="reader"> BufferedReader to read the content of the rule from </param>
-	  /// <param name="conditionPattern"> <seealso cref="String#format(String, Object...)"/> pattern to be used to generate the condition regex
-	  ///                         pattern </param>
-	  /// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
-	  /// <exception cref="IOException"> Can be thrown while reading the rule </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void parseAffix(java.util.TreeMap<String,java.util.List<Character>> affixes, String header, java.io.LineNumberReader reader, String conditionPattern, java.util.Map<String,Integer> seenPatterns, java.util.Map<String,Integer> seenStrips) throws java.io.IOException, java.text.ParseException
-	  private void parseAffix(SortedDictionary<string, IList<char?>> affixes, string header, LineNumberReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
-	  {
-
-		BytesRef scratch = new BytesRef();
-		StringBuilder sb = new StringBuilder();
-		string[] args = header.Split("\\s+", true);
-
-		bool crossProduct = args[2].Equals("Y");
-
-		int numLines = int.Parse(args[3]);
-		affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
-		ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
-
-		for (int i = 0; i < numLines; i++)
-		{
-		  Debug.Assert(affixWriter.Position == currentAffix << 3);
-		  string line = reader.readLine();
-		  string[] ruleArgs = line.Split("\\s+", true);
-
-		  // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
-		  // condition is optional
-		  if (ruleArgs.Length < 4)
-		  {
-			  throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.LineNumber);
-		  }
-
-		  char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
-		  string strip = ruleArgs[2].Equals("0") ? "" : ruleArgs[2];
-		  string affixArg = ruleArgs[3];
-		  char[] appendFlags = null;
-
-		  int flagSep = affixArg.LastIndexOf('/');
-		  if (flagSep != -1)
-		  {
-			string flagPart = affixArg.Substring(flagSep + 1);
-			affixArg = affixArg.Substring(0, flagSep);
-
-			if (aliasCount > 0)
-			{
-			  flagPart = getAliasValue(int.Parse(flagPart));
-			}
-
-			appendFlags = flagParsingStrategy.parseFlags(flagPart);
-			Arrays.sort(appendFlags);
-			twoStageAffix = true;
-		  }
-
-		  // TODO: add test and fix zero-affix handling!
-
-		  string condition = ruleArgs.Length > 4 ? ruleArgs[4] : ".";
-		  // at least the gascon affix file has this issue
-		  if (condition.StartsWith("[", StringComparison.Ordinal) && !condition.EndsWith("]", StringComparison.Ordinal))
-		  {
-			condition = condition + "]";
-		  }
-		  // "dash hasn't got special meaning" (we must escape it)
-		  if (condition.IndexOf('-') >= 0)
-		  {
-			condition = condition.Replace("-", "\\-");
-		  }
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String regex;
-		  string regex;
-		  if (".".Equals(condition))
-		  {
-			regex = ".*"; // Zero condition is indicated by dot
-		  }
-		  else if (condition.Equals(strip))
-		  {
-			regex = ".*"; // TODO: optimize this better:
-						  // if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
-						  // but this is complicated...
-		  }
-		  else
-		  {
-			regex = string.format(Locale.ROOT, conditionPattern, condition);
-		  }
-
-		  // deduplicate patterns
-		  int? patternIndex = seenPatterns[regex];
-		  if (patternIndex == null)
-		  {
-			patternIndex = patterns.Count;
-			if (patternIndex > short.MaxValue)
-			{
-			  throw new System.NotSupportedException("Too many patterns, please report this to dev@lucene.apache.org");
-			}
-			seenPatterns[regex] = patternIndex;
-			CharacterRunAutomaton pattern = new CharacterRunAutomaton((new RegExp(regex, RegExp.NONE)).toAutomaton());
-			patterns.Add(pattern);
-		  }
-
-		  int? stripOrd = seenStrips[strip];
-		  if (stripOrd == null)
-		  {
-			stripOrd = seenStrips.Count;
-			seenStrips[strip] = stripOrd;
-			if (stripOrd > Char.MaxValue)
-			{
-			  throw new System.NotSupportedException("Too many unique strips, please report this to dev@lucene.apache.org");
-			}
-		  }
-
-		  if (appendFlags == null)
-		  {
-			appendFlags = NOFLAGS;
-		  }
-
-		  encodeFlags(scratch, appendFlags);
-		  int appendFlagsOrd = flagLookup.add(scratch);
-		  if (appendFlagsOrd < 0)
-		  {
-			// already exists in our hash
-			appendFlagsOrd = (-appendFlagsOrd) - 1;
-		  }
-		  else if (appendFlagsOrd > short.MaxValue)
-		  {
-			// this limit is probably flexible, but its a good sanity check too
-			throw new System.NotSupportedException("Too many unique append flags, please report this to dev@lucene.apache.org");
-		  }
-
-		  affixWriter.writeShort((short)flag);
-		  affixWriter.writeShort((int)(short)stripOrd);
-		  // encode crossProduct into patternIndex
-		  int patternOrd = (int)patternIndex << 1 | (crossProduct ? 1 : 0);
-		  affixWriter.writeShort((short)patternOrd);
-		  affixWriter.writeShort((short)appendFlagsOrd);
-
-		  if (needsInputCleaning)
-		  {
-			CharSequence cleaned = cleanInput(affixArg, sb);
-			affixArg = cleaned.ToString();
-		  }
-
-		  IList<char?> list = affixes[affixArg];
-		  if (list == null)
-		  {
-			list = new List<>();
-			affixes[affixArg] = list;
-		  }
-
-		  list.Add((char)currentAffix);
-		  currentAffix++;
-		}
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> parseConversions(java.io.LineNumberReader reader, int num) throws java.io.IOException, java.text.ParseException
-	  private FST<CharsRef> parseConversions(LineNumberReader reader, int num)
-	  {
-		IDictionary<string, string> mappings = new SortedDictionary<string, string>();
-
-		for (int i = 0; i < num; i++)
-		{
-		  string line = reader.readLine();
-		  string[] parts = line.Split("\\s+", true);
-		  if (parts.Length != 3)
-		  {
-			throw new ParseException("invalid syntax: " + line, reader.LineNumber);
-		  }
-		  if (mappings.put(parts[1], parts[2]) != null)
-		  {
-			throw new System.InvalidOperationException("duplicate mapping specified for: " + parts[1]);
-		  }
-		}
-
-		Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
-		Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
-		IntsRef scratchInts = new IntsRef();
-		foreach (KeyValuePair<string, string> entry in mappings.SetOfKeyValuePairs())
-		{
-		  Util.toUTF16(entry.Key, scratchInts);
-		  builder.add(scratchInts, new CharsRef(entry.Value));
-		}
-
-		return builder.finish();
-	  }
-
-	  /// <summary>
-	  /// pattern accepts optional BOM + SET + any whitespace </summary>
-	  internal static readonly Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
-
-	  /// <summary>
-	  /// Parses the encoding specified in the affix file readable through the provided InputStream
-	  /// </summary>
-	  /// <param name="affix"> InputStream for reading the affix file </param>
-	  /// <returns> Encoding specified in the affix file </returns>
-	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
-	  /// <exception cref="ParseException"> Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>} </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static String getDictionaryEncoding(java.io.InputStream affix) throws java.io.IOException, java.text.ParseException
-	  internal static string getDictionaryEncoding(InputStream affix)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final StringBuilder encoding = new StringBuilder();
-		StringBuilder encoding = new StringBuilder();
-		for (;;)
-		{
-		  encoding.Length = 0;
-		  int ch;
-		  while ((ch = affix.read()) >= 0)
-		  {
-			if (ch == '\n')
-			{
-			  break;
-			}
-			if (ch != '\r')
-			{
-			  encoding.Append((char)ch);
-			}
-		  }
-		  if (encoding.Length == 0 || encoding[0] == '#' || encoding.ToString().Trim().Length == 0)
-		  {
-			  // this test only at the end as ineffective but would allow lines only containing spaces:
-			if (ch < 0)
-			{
-			  throw new ParseException("Unexpected end of affix file.", 0);
-			}
-			continue;
-		  }
-		  Matcher matcher = ENCODING_PATTERN.matcher(encoding);
-		  if (matcher.find())
-		  {
-			int last = matcher.end();
-			return encoding.Substring(last).Trim();
-		  }
-		}
-	  }
-
-	  internal static readonly IDictionary<string, string> CHARSET_ALIASES;
-	  static Dictionary()
-	  {
-		IDictionary<string, string> m = new Dictionary<string, string>();
-		m["microsoft-cp1251"] = "windows-1251";
-		m["TIS620-2533"] = "TIS-620";
-		CHARSET_ALIASES = Collections.unmodifiableMap(m);
-	  }
-
-	  /// <summary>
-	  /// Retrieves the CharsetDecoder for the given encoding.  Note, This isn't perfect as I think ISCII-DEVANAGARI and
-	  /// MICROSOFT-CP1251 etc are allowed...
-	  /// </summary>
-	  /// <param name="encoding"> Encoding to retrieve the CharsetDecoder for </param>
-	  /// <returns> CharSetDecoder for the given encoding </returns>
-	  private CharsetDecoder getJavaEncoding(string encoding)
-	  {
-		if ("ISO8859-14".Equals(encoding))
-		{
-		  return new ISO8859_14Decoder();
-		}
-		string canon = CHARSET_ALIASES[encoding];
-		if (canon != null)
-		{
-		  encoding = canon;
-		}
-		Charset charset = Charset.forName(encoding);
-		return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE);
-	  }
-
-	  /// <summary>
-	  /// Determines the appropriate <seealso cref="FlagParsingStrategy"/> based on the FLAG definition line taken from the affix file
-	  /// </summary>
-	  /// <param name="flagLine"> Line containing the flag information </param>
-	  /// <returns> FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition </returns>
-	  internal static FlagParsingStrategy getFlagParsingStrategy(string flagLine)
-	  {
-		string[] parts = flagLine.Split("\\s+", true);
-		if (parts.Length != 2)
-		{
-		  throw new System.ArgumentException("Illegal FLAG specification: " + flagLine);
-		}
-		string flagType = parts[1];
-
-		if (NUM_FLAG_TYPE.Equals(flagType))
-		{
-		  return new NumFlagParsingStrategy();
-		}
-		else if (UTF8_FLAG_TYPE.Equals(flagType))
-		{
-		  return new SimpleFlagParsingStrategy();
-		}
-		else if (LONG_FLAG_TYPE.Equals(flagType))
-		{
-		  return new DoubleASCIIFlagParsingStrategy();
-		}
-
-		throw new System.ArgumentException("Unknown flag type: " + flagType);
-	  }
-
-	  internal readonly char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
-
-	  internal virtual string unescapeEntry(string entry)
-	  {
-		StringBuilder sb = new StringBuilder();
-		for (int i = 0; i < entry.Length; i++)
-		{
-		  char ch = entry[i];
-		  if (ch == '\\' && i + 1 < entry.Length)
-		  {
-			sb.Append(entry[i + 1]);
-			i++;
-		  }
-		  else if (ch == '/')
-		  {
-			sb.Append(FLAG_SEPARATOR);
-		  }
-		  else
-		  {
-			sb.Append(ch);
-		  }
-		}
-		return sb.ToString();
-	  }
-
-	  /// <summary>
-	  /// Reads the dictionary file through the provided InputStreams, building up the words map
-	  /// </summary>
-	  /// <param name="dictionaries"> InputStreams to read the dictionary file through </param>
-	  /// <param name="decoder"> CharsetDecoder used to decode the contents of the file </param>
-	  /// <exception cref="IOException"> Can be thrown while reading from the file </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void readDictionaryFiles(java.util.List<java.io.InputStream> dictionaries, java.nio.charset.CharsetDecoder decoder, org.apache.lucene.util.fst.Builder<org.apache.lucene.util.IntsRef> words) throws java.io.IOException
-	  private void readDictionaryFiles(IList<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
-	  {
-		BytesRef flagsScratch = new BytesRef();
-		IntsRef scratchInts = new IntsRef();
-
-		StringBuilder sb = new StringBuilder();
-
-		File unsorted = File.createTempFile("unsorted", "dat", tempDir);
-		OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted);
-		bool success = false;
-		try
-		{
-		  foreach (InputStream dictionary in dictionaries)
-		  {
-			BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
-			string line = lines.readLine(); // first line is number of entries (approximately, sometimes)
-
-			while ((line = lines.readLine()) != null)
-			{
-			  line = unescapeEntry(line);
-			  if (needsInputCleaning)
-			  {
-				int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
-				if (flagSep == -1)
-				{
-				  CharSequence cleansed = cleanInput(line, sb);
-				  writer.write(cleansed.ToString().GetBytes(StandardCharsets.UTF_8));
-				}
-				else
-				{
-				  string text = line.Substring(0, flagSep);
-				  CharSequence cleansed = cleanInput(text, sb);
-				  if (cleansed != sb)
-				  {
-					sb.Length = 0;
-					sb.Append(cleansed);
-				  }
-				  sb.Append(line.Substring(flagSep));
-				  writer.write(sb.ToString().GetBytes(StandardCharsets.UTF_8));
-				}
-			  }
-			  else
-			  {
-				writer.write(line.GetBytes(StandardCharsets.UTF_8));
-			  }
-			}
-		  }
-		  success = true;
-		}
-		finally
-		{
-		  if (success)
-		  {
-			IOUtils.close(writer);
-		  }
-		  else
-		  {
-			IOUtils.closeWhileHandlingException(writer);
-		  }
-		}
-		File sorted = File.createTempFile("sorted", "dat", tempDir);
-
-		OfflineSorter sorter = new OfflineSorter(new ComparatorAnonymousInnerClassHelper(this));
-		sorter.sort(unsorted, sorted);
-		unsorted.delete();
-
-		OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted);
-		BytesRef scratchLine = new BytesRef();
-
-		// TODO: the flags themselves can be double-chars (long) or also numeric
-		// either way the trick is to encode them as char... but they must be parsed differently
-
-		string currentEntry = null;
-		IntsRef currentOrds = new IntsRef();
-
-		string line;
-		while (reader.read(scratchLine))
-		{
-		  line = scratchLine.utf8ToString();
-		  string entry;
-		  char[] wordForm;
-
-		  int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
-		  if (flagSep == -1)
-		  {
-			wordForm = NOFLAGS;
-			entry = line;
-		  }
-		  else
-		  {
-			// note, there can be comments (morph description) after a flag.
-			// we should really look for any whitespace: currently just tab and space
-			int end = line.IndexOf('\t', flagSep);
-			if (end == -1)
-			{
-			  end = line.Length;
-			}
-			int end2 = line.IndexOf(' ', flagSep);
-			if (end2 == -1)
-			{
-			  end2 = line.Length;
-			}
-			end = Math.Min(end, end2);
-
-			string flagPart = StringHelperClass.SubstringSpecial(line, flagSep + 1, end);
-			if (aliasCount > 0)
-			{
-			  flagPart = getAliasValue(int.Parse(flagPart));
-			}
-
-			wordForm = flagParsingStrategy.parseFlags(flagPart);
-			Arrays.sort(wordForm);
-			entry = line.Substring(0, flagSep);
-		  }
-
-		  int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
-		  if (cmp < 0)
-		  {
-			throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
-		  }
-		  else
-		  {
-			encodeFlags(flagsScratch, wordForm);
-			int ord = flagLookup.add(flagsScratch);
-			if (ord < 0)
-			{
-			  // already exists in our hash
-			  ord = (-ord) - 1;
-			}
-			// finalize current entry, and switch "current" if necessary
-			if (cmp > 0 && currentEntry != null)
-			{
-			  Util.toUTF32(currentEntry, scratchInts);
-			  words.add(scratchInts, currentOrds);
-			}
-			// swap current
-			if (cmp > 0 || currentEntry == null)
-			{
-			  currentEntry = entry;
-			  currentOrds = new IntsRef(); // must be this way
-			}
-			currentOrds.grow(currentOrds.length + 1);
-			currentOrds.ints[currentOrds.length++] = ord;
-		  }
-		}
-
-		// finalize last entry
-		Util.toUTF32(currentEntry, scratchInts);
-		words.add(scratchInts, currentOrds);
-
-		reader.close();
-		sorted.delete();
-	  }
-
-	  private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>
-	  {
-		  private readonly Dictionary outerInstance;
-
-		  public ComparatorAnonymousInnerClassHelper(Dictionary outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-			  scratch1 = new BytesRef();
-			  scratch2 = new BytesRef();
-		  }
-
-		  internal BytesRef scratch1;
-		  internal BytesRef scratch2;
-
-		  public virtual int Compare(BytesRef o1, BytesRef o2)
-		  {
-			scratch1.bytes = o1.bytes;
-			scratch1.offset = o1.offset;
-			scratch1.length = o1.length;
-
-			for (int i = scratch1.length - 1; i >= 0; i--)
-			{
-			  if (scratch1.bytes[scratch1.offset + i] == outerInstance.FLAG_SEPARATOR)
-			  {
-				scratch1.length = i;
-				break;
-			  }
-			}
-
-			scratch2.bytes = o2.bytes;
-			scratch2.offset = o2.offset;
-			scratch2.length = o2.length;
-
-			for (int i = scratch2.length - 1; i >= 0; i--)
-			{
-			  if (scratch2.bytes[scratch2.offset + i] == outerInstance.FLAG_SEPARATOR)
-			  {
-				scratch2.length = i;
-				break;
-			  }
-			}
-
-			int cmp = scratch1.compareTo(scratch2);
-			if (cmp == 0)
-			{
-			  // tie break on whole row
-			  return o1.compareTo(o2);
-			}
-			else
-			{
-			  return cmp;
-			}
-		  }
-	  }
-
-	  internal static char[] decodeFlags(BytesRef b)
-	  {
-		if (b.length == 0)
-		{
-		  return CharsRef.EMPTY_CHARS;
-		}
-		int len = (int)((uint)b.length >> 1);
-		char[] flags = new char[len];
-		int upto = 0;
-		int end = b.offset + b.length;
-		for (int i = b.offset; i < end; i += 2)
-		{
-		  flags[upto++] = (char)((b.bytes[i] << 8) | (b.bytes[i + 1] & 0xff));
-		}
-		return flags;
-	  }
-
-	  internal static void encodeFlags(BytesRef b, char[] flags)
-	  {
-		int len = flags.Length << 1;
-		b.grow(len);
-		b.length = len;
-		int upto = b.offset;
-		for (int i = 0; i < flags.Length; i++)
-		{
-		  int flag = flags[i];
-		  b.bytes[upto++] = unchecked((sbyte)((flag >> 8) & 0xff));
-		  b.bytes[upto++] = unchecked((sbyte)(flag & 0xff));
-		}
-	  }
-
-	  private void parseAlias(string line)
-	  {
-		string[] ruleArgs = line.Split("\\s+", true);
-		if (aliases == null)
-		{
-		  //first line should be the aliases count
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int count = Integer.parseInt(ruleArgs[1]);
-		  int count = int.Parse(ruleArgs[1]);
-		  aliases = new string[count];
-		}
-		else
-		{
-		  // an alias can map to no flags
-		  string aliasValue = ruleArgs.Length == 1 ? "" : ruleArgs[1];
-		  aliases[aliasCount++] = aliasValue;
-		}
-	  }
-
-	  private string getAliasValue(int id)
-	  {
-		try
-		{
-		  return aliases[id - 1];
-		}
-		catch (System.IndexOutOfRangeException ex)
-		{
-		  throw new System.ArgumentException("Bad flag alias number:" + id, ex);
-		}
-	  }
-
-	  /// <summary>
-	  /// Abstraction of the process of parsing flags taken from the affix and dic files
-	  /// </summary>
-	  internal abstract class FlagParsingStrategy
-	  {
-
-		/// <summary>
-		/// Parses the given String into a single flag
-		/// </summary>
-		/// <param name="rawFlag"> String to parse into a flag </param>
-		/// <returns> Parsed flag </returns>
-		internal virtual char parseFlag(string rawFlag)
-		{
-		  char[] flags = parseFlags(rawFlag);
-		  if (flags.Length != 1)
-		  {
-			throw new System.ArgumentException("expected only one flag, got: " + rawFlag);
-		  }
-		  return flags[0];
-		}
-
-		/// <summary>
-		/// Parses the given String into multiple flags
-		/// </summary>
-		/// <param name="rawFlags"> String to parse into flags </param>
-		/// <returns> Parsed flags </returns>
-		internal abstract char[] parseFlags(string rawFlags);
-	  }
-
-	  /// <summary>
-	  /// Simple implementation of <seealso cref="FlagParsingStrategy"/> that treats the chars in each String as a individual flags.
-	  /// Can be used with both the ASCII and UTF-8 flag types.
-	  /// </summary>
-	  private class SimpleFlagParsingStrategy : FlagParsingStrategy
-	  {
-		public override char[] parseFlags(string rawFlags)
-		{
-		  return rawFlags.ToCharArray();
-		}
-	  }
-
-	  /// <summary>
-	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded in its numerical form.  In the case
-	  /// of multiple flags, each number is separated by a comma.
-	  /// </summary>
-	  private class NumFlagParsingStrategy : FlagParsingStrategy
-	  {
-		public override char[] parseFlags(string rawFlags)
-		{
-		  string[] rawFlagParts = rawFlags.Trim().Split(",", true);
-		  char[] flags = new char[rawFlagParts.Length];
-		  int upto = 0;
-
-		  for (int i = 0; i < rawFlagParts.Length; i++)
-		  {
-			// note, removing the trailing X/leading I for nepali... what is the rule here?! 
-			string replacement = rawFlagParts[i].replaceAll("[^0-9]", "");
-			// note, ignoring empty flags (this happens in danish, for example)
-			if (replacement.Length == 0)
-			{
-			  continue;
-			}
-			flags[upto++] = (char) int.Parse(replacement);
-		  }
-
-		  if (upto < flags.Length)
-		  {
-			flags = Arrays.copyOf(flags, upto);
-		  }
-		  return flags;
-		}
-	  }
-
-	  /// <summary>
-	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded as two ASCII characters whose codes
-	  /// must be combined into a single character.
-	  /// 
-	  /// TODO (rmuir) test
-	  /// </summary>
-	  private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy
-	  {
-
-		public override char[] parseFlags(string rawFlags)
-		{
-		  if (rawFlags.Length == 0)
-		  {
-			return new char[0];
-		  }
-
-		  StringBuilder builder = new StringBuilder();
-		  if (rawFlags.Length % 2 == 1)
-		  {
-			throw new System.ArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
-		  }
-		  for (int i = 0; i < rawFlags.Length; i += 2)
-		  {
-			char cookedFlag = (char)((int) rawFlags[i] + (int) rawFlags[i + 1]);
-			builder.Append(cookedFlag);
-		  }
-
-		  char[] flags = new char[builder.Length];
-		  builder.getChars(0, builder.Length, flags, 0);
-		  return flags;
-		}
-	  }
-
-	  internal static bool hasFlag(char[] flags, char flag)
-	  {
-		return Arrays.binarySearch(flags, flag) >= 0;
-	  }
-
-	  internal virtual CharSequence cleanInput(CharSequence input, StringBuilder reuse)
-	  {
-		reuse.Length = 0;
-
-		for (int i = 0; i < input.length(); i++)
-		{
-		  char ch = input.charAt(i);
-
-		  if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0)
-		  {
-			continue;
-		  }
-
-		  if (ignoreCase && iconv == null)
-		  {
-			// if we have no input conversion mappings, do this on-the-fly
-			ch = char.ToLower(ch);
-		  }
-
-		  reuse.Append(ch);
-		}
-
-		if (iconv != null)
-		{
-		  try
-		  {
-			applyMappings(iconv, reuse);
-		  }
-		  catch (IOException bogus)
-		  {
-			throw new Exception(bogus);
-		  }
-		  if (ignoreCase)
-		  {
-			for (int i = 0; i < reuse.Length; i++)
-			{
-			  reuse[i] = char.ToLower(reuse[i]);
-			}
-		  }
-		}
-
-		return reuse;
-	  }
-
-	  // TODO: this could be more efficient!
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static void applyMappings(org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> fst, StringBuilder sb) throws java.io.IOException
-	  internal static void applyMappings(FST<CharsRef> fst, StringBuilder sb)
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
-		FST.BytesReader bytesReader = fst.BytesReader;
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> firstArc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef>());
-		FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
-		CharsRef NO_OUTPUT = fst.outputs.NoOutput;
-
-		// temporary stuff
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> arc = new org.apache.lucene.util.fst.FST.Arc<>();
-		FST.Arc<CharsRef> arc = new FST.Arc<CharsRef>();
-		int longestMatch;
-		CharsRef longestOutput;
-
-		for (int i = 0; i < sb.Length; i++)
-		{
-		  arc.copyFrom(firstArc);
-		  CharsRef output = NO_OUTPUT;
-		  longestMatch = -1;
-		  longestOutput = null;
-
-		  for (int j = i; j < sb.Length; j++)
-		  {
-			char ch = sb[j];
-			if (fst.findTargetArc(ch, arc, arc, bytesReader) == null)
-			{
-			  break;
-			}
-			else
-			{
-			  output = fst.outputs.add(output, arc.output);
-			}
-			if (arc.Final)
-			{
-			  longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
-			  longestMatch = j;
-			}
-		  }
-
-		  if (longestMatch >= 0)
-		  {
-			sb.Remove(i, longestMatch + 1 - i);
-			sb.Insert(i, longestOutput);
-			i += (longestOutput.length - 1);
-		  }
-		}
-	  }
-	}
-
+    /// <summary>
+    /// In-memory structure for the dictionary (.dic) and affix (.aff)
+    /// data of a hunspell dictionary.
+    /// </summary>
+    public class Dictionary
+    {
+        internal static readonly char[] NOFLAGS = new char[0];
+
+        private const string ALIAS_KEY = "AF";
+        private const string PREFIX_KEY = "PFX";
+        private const string SUFFIX_KEY = "SFX";
+        private const string FLAG_KEY = "FLAG";
+        private const string COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
+        private const string CIRCUMFIX_KEY = "CIRCUMFIX";
+        private const string IGNORE_KEY = "IGNORE";
+        private const string ICONV_KEY = "ICONV";
+        private const string OCONV_KEY = "OCONV";
+
+        private const string NUM_FLAG_TYPE = "num";
+        private const string UTF8_FLAG_TYPE = "UTF-8";
+        private const string LONG_FLAG_TYPE = "long";
+
+        // TODO: really for suffixes we should reverse the automaton and run them backwards
+        private const string PREFIX_CONDITION_REGEX_PATTERN = "{0}.*";
+        private const string SUFFIX_CONDITION_REGEX_PATTERN = ".*{0}";
+
+        internal FST<IntsRef> prefixes;
+        internal FST<IntsRef> suffixes;
+
+        // all condition checks used by prefixes and suffixes. these are typically re-used across
+        // many affix stripping rules. so these are deduplicated, to save RAM.
+        internal List<CharacterRunAutomaton> patterns = new List<CharacterRunAutomaton>();
+
+        // the entries in the .dic file, mapping to their set of flags.
+        // the fst output is the ordinal list for flagLookup
+        internal FST<IntsRef> words;
+        // the list of unique flagsets (wordforms). theoretically huge, but practically
+        // small (e.g. for polish this is 756), otherwise humans wouldn't be able to deal with it either.
+        internal BytesRefHash flagLookup = new BytesRefHash();
+
+        // the list of unique strip affixes.
+        internal char[] stripData;
+        internal int[] stripOffsets;
+
+        // 8 bytes per affix
+        internal byte[] affixData = new byte[64];
+        private int currentAffix = 0;
+
+        private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+
+        private string[] aliases;
+        private int aliasCount = 0;
+
+        private readonly DirectoryInfo tempDir = OfflineSorter.DefaultTempDir(); // TODO: make this configurable?
+
+        internal bool ignoreCase;
+        internal bool complexPrefixes;
+        internal bool twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
+
+        internal int circumfix = -1; // circumfix flag, or -1 if one is not defined
+
+        // ignored characters (dictionary, affix, inputs)
+        private char[] ignore;
+
+        // FSTs used for ICONV/OCONV, output ord pointing to replacement text
+        internal FST<CharsRef> iconv;
+        internal FST<CharsRef> oconv;
+
+        internal bool needsInputCleaning;
+        internal bool needsOutputCleaning;
+
+        // LUCENENET: Added so we can get better performance than creating the regex in every tight loop.
+        private static Regex whitespacePattern = new Regex("\\s+", RegexOptions.Compiled);
+
+        /// <summary>
+        /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+        /// and dictionary files.
+        /// You have to close the provided InputStreams yourself.
+        /// </summary>
+        /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+        /// <param name="dictionary"> InputStream for reading the hunspell dictionary file (won't be closed). </param>
+        /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+        /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+        public Dictionary(Stream affix, Stream dictionary) 
+            : this(affix, new List<Stream>() { dictionary }, false)
+        {
+        }
+
+        /// <summary>
+        /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+        /// and dictionary files.
+        /// You have to close the provided InputStreams yourself.
+        /// </summary>
+        /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+        /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param>
+        /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+        /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+        public Dictionary(Stream affix, IList<Stream> dictionaries, bool ignoreCase)
+        {
+            this.ignoreCase = ignoreCase;
+            this.needsInputCleaning = ignoreCase;
+            this.needsOutputCleaning = false; // set if we have an OCONV
+            flagLookup.Add(new BytesRef()); // no flags -> ord 0
+
+            FileInfo aff = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "affix.aff"));
+            using (Stream @out = aff.Create())
+            {
+                Stream aff1 = null;
+                Stream aff2 = null;
+                try
+                {
+                    // copy contents of affix stream to temp file
+                    byte[] buffer = new byte[1024 * 8];
+                    int len;
+                    while ((len = affix.Read(buffer, 0, buffer.Length)) > 0)
+                    {
+                        @out.Write(buffer, 0, len);
+                    }
+                    @out.Close(); // LUCENENET: Release the file handle - we dispose @out later
+
+                    // pass 1: get encoding
+                    aff1 = File.OpenRead(aff.FullName);
+                    string encoding = GetDictionaryEncoding(aff1);
+
+                    // pass 2: parse affixes
+                    Encoding decoder = GetSystemEncoding(encoding);
+                    aff2 = File.OpenRead(aff.FullName);
+                    ReadAffixFile(aff2, decoder);
+
+                    // read dictionary entries
+                    IntSequenceOutputs o = IntSequenceOutputs.Singleton;
+                    Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+                    ReadDictionaryFiles(dictionaries, decoder, b);
+                    words = b.Finish();
+                    aliases = null; // no longer needed
+                }
+                finally
+                {
+                    IOUtils.CloseWhileHandlingException(aff1, aff2);
+                    aff.Delete();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Looks up Hunspell word forms from the dictionary
+        /// </summary>
+        internal virtual IntsRef LookupWord(char[] word, int offset, int length)
+        {
+            return Lookup(words, word, offset, length);
+        }
+
+        /// <summary>
+        /// Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length
+        /// </summary>
+        /// <param name="word"> Char array to generate the String from </param>
+        /// <param name="offset"> Offset in the char array that the String starts at </param>
+        /// <param name="length"> Length from the offset that the String is </param>
+        /// <returns> List of HunspellAffix prefixes with an append that matches the String, or {@code null} if none are found </returns>
+        internal virtual IntsRef LookupPrefix(char[] word, int offset, int length)
+        {
+            return Lookup(prefixes, word, offset, length);
+        }
+
+        /// <summary>
+        /// Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length
+        /// </summary>
+        /// <param name="word"> Char array to generate the String from </param>
+        /// <param name="offset"> Offset in the char array that the String starts at </param>
+        /// <param name="length"> Length from the offset that the String is </param>
+        /// <returns> List of HunspellAffix suffixes with an append that matches the String, or {@code null} if none are found </returns>
+        internal virtual IntsRef LookupSuffix(char[] word, int offset, int length)
+        {
+            return Lookup(suffixes, word, offset, length);
+        }
+
+        // TODO: this is pretty stupid, considering how the stemming algorithm works
+        // we can speed it up to be significantly faster!
+        internal virtual IntsRef Lookup(FST<IntsRef> fst, char[] word, int offset, int length)
+        {
+            if (fst == null)
+            {
+                return null;
+            }
+            FST.BytesReader bytesReader = fst.BytesReader;
+            FST.Arc<IntsRef> arc = fst.GetFirstArc(new FST.Arc<IntsRef>());
+            // Accumulate output as we go
+            IntsRef NO_OUTPUT = fst.Outputs.NoOutput;
+            IntsRef output = NO_OUTPUT;
+
+            int l = offset + length;
+            try
+            {
+                for (int i = offset, cp = 0; i < l; i += Character.CharCount(cp))
+                {
+                    cp = Character.CodePointAt(word, i, l);
+                    if (fst.FindTargetArc(cp, arc, arc, bytesReader) == null)
+                    {
+                        return null;
+                    }
+                    else if (arc.Output != NO_OUTPUT)
+                    {
+                        output = fst.Outputs.Add(output, arc.Output);
+                    }
+                }
+                if (fst.FindTargetArc(FST<IntsRef>.END_LABEL, arc, arc, bytesReader) == null)
+                {
+                    return null;
+                }
+                else if (arc.Output != NO_OUTPUT)
+                {
+                    return fst.Outputs.Add(output, arc.Output);
+                }
+                else
+                {
+                    return output;
+                }
+            }
+            catch (IOException bogus)
+            {
+                throw new Exception(bogus.Message, bogus);
+            }
+        }
+
+        /// <summary>
+        /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps
+        /// </summary>
+        /// <param name="affixStream"> InputStream to read the content of the affix file from </param>
+        /// <param name="decoder"> CharsetDecoder to decode the content of the file </param>
+        /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+        private void ReadAffixFile(Stream affixStream, Encoding decoder)
+        {
+            SortedDictionary<string, IList<char?>> prefixes = new SortedDictionary<string, IList<char?>>();
+            SortedDictionary<string, IList<char?>> suffixes = new SortedDictionary<string, IList<char?>>();
+            IDictionary<string, int?> seenPatterns = new Dictionary<string, int?>();
+
+            // zero condition -> 0 ord
+            seenPatterns[".*"] = 0;
+            patterns.Add(null);
+
+            // zero strip -> 0 ord
+            IDictionary<string, int?> seenStrips = new Dictionary<string, int?>();
+            seenStrips[""] = 0;
+
+            var reader = new StreamReader(affixStream, decoder);
+            string line = null;
+            int lineNumber = 0;
+            while ((line = reader.ReadLine()) != null)
+            {
+                lineNumber++;
+                // ignore any BOM marker on first line
+                if (lineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal))
+                {
+                    line = line.Substring(1);
+                }
+                if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal))
+                {
+                    ParseAlias(line);
+                }
+                else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
+                {
+                    ParseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+                }
+                else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal))
+                {
+                    ParseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+                }
+                else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal))
+                {
+                    // Assume that the FLAG line comes before any prefix or suffixes
+                    // Store the strategy so it can be used when parsing the dic file
+                    flagParsingStrategy = GetFlagParsingStrategy(line);
+                }
+                else if (line.Equals(COMPLEXPREFIXES_KEY))
+                {
+                    complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
+                }
+                else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal))
+                {
+                    string[] parts = whitespacePattern.Split(line);
+                    if (parts.Length != 2)
+                    {
+                        throw new Exception(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber));
+                    }
+                    circumfix = flagParsingStrategy.parseFlag(parts[1]);
+                }
+                else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
+                {
+                    string[] parts = whitespacePattern.Split(line);
+                    if (parts.Length != 2)
+                    {
+                        throw new Exception(string.Format("Illegal IGNORE declaration, line {0}", lineNumber));
+                    }
+                    ignore = parts[1].ToCharArray();
+                    Array.Sort(ignore);
+                    needsInputCleaning = true;
+                }
+                else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal))
+                {
+                    string[] parts = whitespacePattern.Split(line);
+                    string type = parts[0];
+                    if (parts.Length != 2)
+                    {
+                        throw new Exception(string.Format("Illegal {0} declaration, line {1}", type, lineNumber));
+                    }
+                    int num = int.Parse(parts[1], CultureInfo.InvariantCulture);
+                    FST<CharsRef> res = ParseConversions(reader, num);
+                    if (type.Equals("ICONV"))
+                    {
+                        iconv = res;
+                        needsInputCleaning |= iconv != null;
+                    }
+                    else
+                    {
+                        oconv = res;
+                        needsOutputCleaning |= oconv != null;
+                    }
+                }
+            }
+
+            this.prefixes = AffixFST(prefixes);
+            this.suffixes = AffixFST(suffixes);
+
+            int totalChars = 0;
+            foreach (string strip in seenStrips.Keys)
+            {
+                totalChars += strip.Length;
+            }
+            stripData = new char[totalChars];
+            stripOffsets = new int[seenStrips.Count + 1];
+            int currentOffset = 0;
+            int currentIndex = 0;
+            foreach (string strip in seenStrips.Keys)
+            {
+                stripOffsets[currentIndex++] = currentOffset;
+                strip.CopyTo(0, stripData, currentOffset, strip.Length - 0);
+                currentOffset += strip.Length;
+            }
+            Debug.Assert(currentIndex == seenStrips.Count);
+            stripOffsets[currentIndex] = currentOffset;
+        }
+
+        private FST<IntsRef> AffixFST(SortedDictionary<string, IList<char?>> affixes)
+        {
+            IntSequenceOutputs outputs = IntSequenceOutputs.Singleton;
+            Builder<IntsRef> builder = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, outputs);
+
+            IntsRef scratch = new IntsRef();
+            foreach (KeyValuePair<string, IList<char?>> entry in affixes)
+            {
+                Lucene.Net.Util.Fst.Util.ToUTF32(entry.Key, scratch);
+                IList<char?> entries = entry.Value;
+                IntsRef output = new IntsRef(entries.Count);
+                foreach (char? c in entries)
+                {
+                    output.Ints[output.Length++] = c.HasValue ? c.Value : 0;
+                }
+                builder.Add(scratch, output);
+            }
+            return builder.Finish();
+        }
+
+        /// <summary>
+        /// Parses a specific affix rule putting the result into the provided affix map
+        /// </summary>
+        /// <param name="affixes"> Map where the result of the parsing will be put </param>
+        /// <param name="header"> Header line of the affix rule </param>
+        /// <param name="reader"> BufferedReader to read the content of the rule from </param>
+        /// <param name="conditionPattern"> <seealso cref="String#format(String, Object...)"/> pattern to be used to generate the condition regex
+        ///                         pattern </param>
+        /// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
+        /// <exception cref="IOException"> Can be thrown while reading the rule </exception>
+        private void ParseAffix(SortedDictionary<string, IList<char?>> affixes, string header, TextReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
+        {
+
+            BytesRef scratch = new BytesRef();
+            StringBuilder sb = new StringBuilder();
+            string[] args = whitespacePattern.Split(header);
+
+            bool crossProduct = args[2].Equals("Y");
+
+            int numLines = int.Parse(args[3], CultureInfo.InvariantCulture);
+            affixData = ArrayUtil.Grow(affixData, (currentAffix << 3) + (numLines << 3));
+            ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
+
+            for (int i = 0; i < numLines; i++)
+            {
+                Debug.Assert(affixWriter.Position == currentAffix << 3);
+                string line = reader.ReadLine();
+                string[] ruleArgs = whitespacePattern.Split(line);
+
+                // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
+                // condition is optional
+                if (ruleArgs.Length < 4)
+                {
+                    throw new Exception("The affix file contains a rule with less than four elements: " + line /*, reader.LineNumber */);// LUCENENET TODO: LineNumberReader
+                }
+
+                char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
+                string strip = ruleArgs[2].Equals("0") ? "" : ruleArgs[2];
+                string affixArg = ruleArgs[3];
+                char[] appendFlags = null;
+
+                int flagSep = affixArg.LastIndexOf('/');
+                if (flagSep != -1)
+                {
+                    string flagPart = affixArg.Substring(flagSep + 1);
+                    affixArg = affixArg.Substring(0, flagSep - 0);
+
+                    if (aliasCount > 0)
+                    {
+                        flagPart = GetAliasValue(int.Parse(flagPart, CultureInfo.InvariantCulture));
+                    }
+
+                    appendFlags = flagParsingStrategy.ParseFlags(flagPart);
+                    Array.Sort(appendFlags);
+                    twoStageAffix = true;
+                }
+
+                // TODO: add test and fix zero-affix handling!
+
+                string condition = ruleArgs.Length > 4 ? ruleArgs[4] : ".";
+                // at least the gascon affix file has this issue
+                if (condition.StartsWith("[", StringComparison.Ordinal) && !condition.EndsWith("]", StringComparison.Ordinal))
+                {
+                    condition = condition + "]";
+                }
+                // "dash hasn't got special meaning" (we must escape it)
+                if (condition.IndexOf('-') >= 0)
+                {
+                    condition = condition.Replace("-", "\\-");
+                }
+
+                string regex;
+                if (".".Equals(condition))
+                {
+                    regex = ".*"; // Zero condition is indicated by dot
+                }
+                else if (condition.Equals(strip))
+                {
+                    regex = ".*"; // TODO: optimize this better:
+                                  // if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
+                                  // but this is complicated...
+                }
+                else
+                {
+                    regex = string.Format(CultureInfo.InvariantCulture, conditionPattern, condition);
+                }
+
+                // deduplicate patterns
+                int? patternIndex = seenPatterns.ContainsKey(regex) ? seenPatterns[regex] : null;
+                if (patternIndex == null)
+                {
+                    patternIndex = patterns.Count;
+                    if (patternIndex > short.MaxValue)
+                    {
+                        throw new System.NotSupportedException("Too many patterns, please report this to dev@lucene.apache.org");
+                    }
+                    seenPatterns[regex] = patternIndex;
+                    CharacterRunAutomaton pattern = new CharacterRunAutomaton((new RegExp(regex, RegExp.NONE)).ToAutomaton());
+                    patterns.Add(pattern);
+                }
+
+                int? stripOrd = seenStrips.ContainsKey(strip) ? seenStrips[strip] : null;
+                if (stripOrd == null)
+                {
+                    stripOrd = seenStrips.Count;
+                    seenStrips[strip] = stripOrd;
+                    if (stripOrd > char.MaxValue)
+                    {
+                        throw new System.NotSupportedException("Too many unique strips, please report this to dev@lucene.apache.org");
+                    }
+                }
+
+                if (appendFlags == null)
+                {
+                    appendFlags = NOFLAGS;
+                }
+
+                EncodeFlags(scratch, appendFlags);
+                int appendFlagsOrd = flagLookup.Add(scratch);
+                if (appendFlagsOrd < 0)
+                {
+                    // already exists in our hash
+                    appendFlagsOrd = (-appendFlagsOrd) - 1;
+                }
+                else if (appendFlagsOrd > short.MaxValue)
+                {
+                    // this limit is probably flexible, but its a good sanity check too
+                    throw new System.NotSupportedException("Too many unique append flags, please report this to dev@lucene.apache.org");
+                }
+
+                affixWriter.WriteShort((short)flag);
+                affixWriter.WriteShort((short)stripOrd);
+                // encode crossProduct into patternIndex
+                int patternOrd = (int)patternIndex << 1 | (crossProduct ? 1 : 0);
+                affixWriter.WriteShort((short)patternOrd);
+                affixWriter.WriteShort((short)appendFlagsOrd);
+
+                if (needsInputCleaning)
+                {
+                    string cleaned = CleanInput(affixArg, sb);
+                    affixArg = cleaned.ToString();
+                }
+
+                IList<char?> list = affixes.ContainsKey(affixArg) ? affixes[affixArg] : null;
+                if (list == null)
+                {
+                    list = new List<char?>();
+                    affixes[affixArg] = list;
+                }
+
+                list.Add((char)currentAffix);
+                currentAffix++;
+            }
+        }
+
+        private FST<CharsRef> ParseConversions(TextReader reader, int num)
+        {
+            IDictionary<string, string> mappings = new SortedDictionary<string, string>();
+
+            for (int i = 0; i < num; i++)
+            {
+                string line = reader.ReadLine();
+                string[] parts = whitespacePattern.Split(line);
+                if (parts.Length != 3)
+                {
+                    throw new Exception("invalid syntax: " + line /*, reader.LineNumber */); // LUCENENET TODO: LineNumberReader
+                }
+                if (mappings.Put(parts[1], parts[2]) != null)
+                {
+                    throw new System.InvalidOperationException("duplicate mapping specified for: " + parts[1]);
+                }
+            }
+
+            Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+            Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
+            IntsRef scratchInts = new IntsRef();
+            foreach (KeyValuePair<string, string> entry in mappings)
+            {
+                Lucene.Net.Util.Fst.Util.ToUTF16(entry.Key, scratchInts);
+                builder.Add(scratchInts, new CharsRef(entry.Value));
+            }
+
+            return builder.Finish();
+        }
+
+        /// <summary>
+        /// pattern accepts optional BOM + SET + any whitespace </summary>
+        internal static readonly Regex ENCODING_PATTERN = new Regex("^(\u00EF\u00BB\u00BF)?SET\\s+", RegexOptions.Compiled);
+
+        /// <summary>
+        /// Parses the encoding specified in the affix file readable through the provided InputStream
+        /// </summary>
+        /// <param name="affix"> InputStream for reading the affix file </param>
+        /// <returns> Encoding specified in the affix file </returns>
+        /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+        /// <exception cref="ParseException"> Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>} </exception>
+        internal static string GetDictionaryEncoding(Stream affix)
+        {
+            StringBuilder encoding = new StringBuilder();
+            for (;;)
+            {
+                encoding.Length = 0;
+                int ch;
+                while ((ch = affix.ReadByte()) > 0)
+                {
+                    if (ch == '\n')
+                    {
+                        break;
+                    }
+                    if (ch != '\r')
+                    {
+                        encoding.Append((char)ch);
+                    }
+                }
+                if (encoding.Length == 0 || encoding[0] == '#' || encoding.ToString().Trim().Length == 0)
+                {
+                    // this test only at the end as ineffective but would allow lines only containing spaces:
+                    if (ch < 0)
+                    {
+                        throw new Exception("Unexpected end of affix file." /*, 0*/);
+                    }
+                    continue;
+                }
+                Match matcher = ENCODING_PATTERN.Match(encoding.ToString());
+                if (matcher.Success)
+                {
+                    int last = matcher.Index + matcher.Length;
+                    return encoding.ToString(last, encoding.Length - last).Trim();
+                }
+            }
+        }
+
+        internal static readonly IDictionary<string, string> CHARSET_ALIASES;
+        static Dictionary()
+        {
+            IDictionary<string, string> m = new Dictionary<string, string>();
+            m["microsoft-cp1251"] = "windows-1251";
+            m["TIS620-2533"] = "TIS-620";
+            CHARSET_ALIASES = Collections.UnmodifiableMap(m);
+        }
+
+        /// <summary>
+        /// Retrieves the CharsetDecoder for the given encoding.  Note, This isn't perfect as I think ISCII-DEVANAGARI and
+        /// MICROSOFT-CP1251 etc are allowed...
+        /// </summary>
+        /// <param name="encoding"> Encoding to retrieve the CharsetDecoder for </param>
+        /// <returns> CharSetDecoder for the given encoding </returns>
+        // LUCENENET NOTE: This was getJavaEncoding in the original
+        private Encoding GetSystemEncoding(string encoding)
+        {
+            if ("ISO8859-14".Equals(encoding, StringComparison.OrdinalIgnoreCase))
+            {
+                return new ISO8859_14Encoding();
+            }
+            return Encoding.GetEncoding(encoding);
+        }
+
+
+        /// <summary>
+        /// Determines the appropriate <seealso cref="FlagParsingStrategy"/> based on the FLAG definition line taken from the affix file
+        /// </summary>
+        /// <param name="flagLine"> Line containing the flag information </param>
+        /// <returns> FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition </returns>
+        internal static FlagParsingStrategy GetFlagParsingStrategy(string flagLine)
+        {
+            string[] parts = whitespacePattern.Split(flagLine);
+            if (parts.Length != 2)
+            {
+                throw new System.ArgumentException("Illegal FLAG specification: " + flagLine);
+            }
+            string flagType = parts[1];
+
+            if (NUM_FLAG_TYPE.Equals(flagType))
+            {
+                return new NumFlagParsingStrategy();
+            }
+            else if (UTF8_FLAG_TYPE.Equals(flagType))
+            {
+                return new SimpleFlagParsingStrategy();
+            }
+            else if (LONG_FLAG_TYPE.Equals(flagType))
+            {
+                return new DoubleASCIIFlagParsingStrategy();
+            }
+
+            throw new System.ArgumentException("Unknown flag type: " + flagType);
+        }
+
+        internal readonly char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
+
+        internal virtual string UnescapeEntry(string entry)
+        {
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < entry.Length; i++)
+            {
+                char ch = entry[i];
+                if (ch == '\\' && i + 1 < entry.Length)
+                {
+                    sb.Append(entry[i + 1]);
+                    i++;
+                }
+                else if (ch == '/')
+                {
+                    sb.Append(FLAG_SEPARATOR);
+                }
+                else
+                {
+                    sb.Append(ch);
+                }
+            }
+            return sb.ToString();
+        }
+
+        /// <summary>
+        /// Reads the dictionary file through the provided InputStreams, building up the words map
+        /// </summary>
+        /// <param name="dictionaries"> InputStreams to read the dictionary file through </param>
+        /// <param name="decoder"> CharsetDecoder used to decode the contents of the file </param>
+        /// <exception cref="IOException"> Can be thrown while reading from the file </exception>
+        private void ReadDictionaryFiles(IList<Stream> dictionaries, Encoding decoder, Builder<IntsRef> words)
+        {
+            BytesRef flagsScratch = new BytesRef();
+            IntsRef scratchInts = new IntsRef();
+
+            StringBuilder sb = new StringBuilder();
+
+            FileInfo unsorted = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "unsorted.dat"));
+            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted);
+            bool success = false;
+            try
+            {
+                foreach (Stream dictionary in dictionaries)
+                {
+                    var lines = new StreamReader(dictionary, decoder);
+                    string line = lines.ReadLine(); // first line is number of entries (approximately, sometimes)
+
+                    while ((line = lines.ReadLine()) != null)
+                    {
+                        line = UnescapeEntry(line);
+                        if (needsInputCleaning)
+                        {
+                            int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
+                            if (flagSep == -1)
+                            {
+                                string cleansed = CleanInput(line, sb);
+                                writer.Write(cleansed.ToString().GetBytes(Encoding.UTF8));
+                            }
+                            else
+                            {
+                                string text = line.Substring(0, flagSep - 0);
+                                string cleansed = CleanInput(text, sb);
+                                if (cleansed != sb.ToString())
+                                {
+                                    sb.Length = 0;
+                                    sb.Append(cleansed);
+                                }
+                                sb.Append(line.Substring(flagSep));
+                                writer.Write(sb.ToString().GetBytes(Encoding.UTF8));
+                            }
+                        }
+                        else
+                        {
+                            writer.Write(line.GetBytes(Encoding.UTF8));
+                        }
+                    }
+                }
+                success = true;
+            }
+            finally
+            {
+                if (success)
+                {
+                    IOUtils.Close(writer);
+                }
+                else
+                {
+                    IOUtils.CloseWhileHandlingException(writer);
+                }
+            }
+            FileInfo sorted = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "sorted.dat"));
+            using (var temp = sorted.Create()) { }
+
+            OfflineSorter sorter = new OfflineSorter(new ComparatorAnonymousInnerClassHelper(this));
+            sorter.Sort(unsorted, sorted);
+            unsorted.Delete();
+
+            OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted);
+            BytesRef scratchLine = new BytesRef();
+
+            // TODO: the flags themselves can be double-chars (long) or also numeric
+            // either way the trick is to encode them as char... but they must be parsed differently
+
+            string currentEntry = null;
+            IntsRef currentOrds = new IntsRef();
+
+            string line2;
+            while (reader.Read(scratchLine))
+            {
+                line2 = scratchLine.Utf8ToString();
+                string entry;
+                char[] wordForm;
+
+                int flagSep = line2.LastIndexOf(FLAG_SEPARATOR);
+                if (flagSep == -1)
+                {
+                    wordForm = NOFLAGS;
+                    entry = line2;
+                }
+                else
+                {
+                    // note, there can be comments (morph description) after a flag.
+                    // we should really look for any whitespace: currently just tab and space
+                    int end = line2.IndexOf('\t', flagSep);
+                    if (end == -1)
+                    {
+                        end = line2.Length;
+                    }
+                    int end2 = line2.IndexOf(' ', flagSep);
+                    if (end2 == -1)
+                    {
+                        end2 = line2.Length;
+                    }
+                    end = Math.Min(end, end2);
+
+                    string flagPart = line2.Substring(flagSep + 1, end - (flagSep + 1));
+                    if (aliasCount > 0)
+                    {
+                        flagPart = GetAliasValue(int.Parse(flagPart, CultureInfo.InvariantCulture));
+                    }
+
+                    wordForm = flagParsingStrategy.ParseFlags(flagPart);
+                    Array.Sort(wordForm);
+                    entry = line2.Substring(0, flagSep - 0);
+                }
+
+                int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
+                if (cmp < 0)
+                {
+                    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
+                }
+                else
+                {
+                    EncodeFlags(flagsScratch, wordForm);
+                    int ord = flagLookup.Add(flagsScratch);
+                    if (ord < 0)
+                    {
+                        // already exists in our hash
+                        ord = (-ord) - 1;
+                    }
+                    // finalize current entry, and switch "current" if necessary
+                    if (cmp > 0 && currentEntry != null)
+                    {
+                        Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
+                        words.Add(scratchInts, currentOrds);
+                    }
+                    // swap current
+                    if (cmp > 0 || currentEntry == null)
+                    {
+                        currentEntry = entry;
+                        currentOrds = new IntsRef(); // must be this way
+                    }
+                    currentOrds.Grow(currentOrds.Length + 1);
+                    currentOrds.Ints[currentOrds.Length++] = ord;
+                }
+            }
+
+            // finalize last entry
+            Lucene.Net.Util.Fst.Util.ToUTF32(currentEntry, scratchInts);
+            words.Add(scratchInts, currentOrds);
+
+            reader.Dispose();
+            sorted.Delete();
+        }
+
+        private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>
+        {
+            private readonly Dictionary outerInstance;
+
+            public ComparatorAnonymousInnerClassHelper(Dictionary outerInstance)
+            {
+                this.outerInstance = outerInstance;
+                scratch1 = new BytesRef();
+                scratch2 = new BytesRef();
+            }
+
+            internal BytesRef scratch1;
+            internal BytesRef scratch2;
+
+            public virtual int Compare(BytesRef o1, BytesRef o2)
+            {
+                scratch1.Bytes = o1.Bytes;
+                scratch1.Offset = o1.Offset;
+                scratch1.Length = o1.Length;
+
+                for (int i = scratch1.Length - 1; i >= 0; i--)
+                {
+                    if (scratch1.Bytes[scratch1.Offset + i] == outerInstance.FLAG_SEPARATOR)
+                    {
+                        scratch1.Length = i;
+                        break;
+                    }
+                }
+
+                scratch2.Bytes = o2.Bytes;
+                scratch2.Offset = o2.Offset;
+                scratch2.Length = o2.Length;
+
+                for (int i = scratch2.Length - 1; i >= 0; i--)
+                {
+                    if (scratch2.Bytes[scratch2.Offset + i] == outerInstance.FLAG_SEPARATOR)
+                    {
+                        scratch2.Length = i;
+                        break;
+                    }
+                }
+
+                int cmp = scratch1.CompareTo(scratch2);
+                if (cmp == 0)
+                {
+                    // tie break on whole row
+                    return o1.CompareTo(o2);
+                }
+                else
+                {
+                    return cmp;
+                }
+            }
+        }
+
+        internal static char[] DecodeFlags(BytesRef b)
+        {
+            if (b.Length == 0)
+            {
+                return CharsRef.EMPTY_CHARS;
+            }
+            int len = (int)((uint)b.Length >> 1);
+            char[] flags = new char[len];
+            int upto = 0;
+            int end = b.Offset + b.Length;
+            for (int i = b.Offset; i < end; i += 2)
+            {
+                flags[upto++] = (char)((b.Bytes[i] << 8) | (b.Bytes[i + 1] & 0xff));
+            }
+            return flags;
+        }
+
+        internal static void EncodeFlags(BytesRef b, char[] flags)
+        {
+            int len = flags.Length << 1;
+            b.Grow(len);
+            b.Length = len;
+            int upto = b.Offset;
+            for (int i = 0; i < flags.Length; i++)
+            {
+                int flag = flags[i];
+                b.Bytes[upto++] = (byte)((flag >> 8) & 0xff);
+                b.Bytes[upto++] = (byte)(flag & 0xff);
+            }
+        }
+
+        private void ParseAlias(string line)
+        {
+            string[] ruleArgs = whitespacePattern.Split(line);
+            if (aliases == null)
+            {
+                //first line should be the aliases count
+                int count = int.Parse(ruleArgs[1], CultureInfo.InvariantCulture);
+                aliases = new string[count];
+            }
+            else
+            {
+                // an alias can map to no flags
+                string aliasValue = ruleArgs.Length == 1 ? "" : ruleArgs[1];
+                aliases[aliasCount++] = aliasValue;
+            }
+        }
+
+        private string GetAliasValue(int id)
+        {
+            try
+            {
+                return aliases[id - 1];
+            }
+            catch (System.IndexOutOfRangeException ex)
+            {
+                throw new System.ArgumentException("Bad flag alias number:" + id, ex);
+            }
+        }
+
+        /// <summary>
+        /// Abstraction of the process of parsing flags taken from the affix and dic files
+        /// </summary>
+        internal abstract class FlagParsingStrategy
+        {
+
+            /// <summary>
+            /// Parses the given String into a single flag
+            /// </summary>
+            /// <param name="rawFlag"> String to parse into a flag </param>
+            /// <returns> Parsed flag </returns>
+            internal virtual char parseFlag(string rawFlag)
+            {
+                char[] flags = ParseFlags(rawFlag);
+                if (flags.Length != 1)
+                {
+                    throw new System.ArgumentException("expected only one flag, got: " + rawFlag);
+                }
+                return flags[0];
+            }
+
+            /// <summary>
+            /// Parses the given String into multiple flags
+            /// </summary>
+            /// <param name="rawFlags"> String to parse into flags </param>
+            /// <returns> Parsed flags </returns>
+            internal abstract char[] ParseFlags(string rawFlags);
+        }
+
+        /// <summary>
+        /// Simple implementation of <seealso cref="FlagParsingStrategy"/> that treats the chars in each String as a individual flags.
+        /// Can be used with both the ASCII and UTF-8 flag types.
+        /// </summary>
+        private class SimpleFlagParsingStrategy : FlagParsingStrategy
+        {
+            internal override char[] ParseFlags(string rawFlags)
+            {
+                return rawFlags.ToCharArray();
+            }
+        }
+
+        /// <summary>
+        /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded in its numerical form.  In the case
+        /// of multiple flags, each number is separated by a comma.
+        /// </summary>
+        private class NumFlagParsingStrategy : FlagParsingStrategy
+        {
+            internal override char[] ParseFlags(string rawFlags)
+            {
+                string[] rawFlagParts = rawFlags.Trim().Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
+                char[] flags = new char[rawFlagParts.Length];
+                int upto = 0;
+
+                for (int i = 0; i < rawFlagParts.Length; i++)
+                {
+                    // note, removing the trailing X/leading I for nepali... what is the rule here?! 
+                    string replacement = Regex.Replace(rawFlagParts[i], "[^0-9]", "");
+                    // note, ignoring empty flags (this happens in danish, for example)
+                    if (replacement.Length == 0)
+                    {
+                        continue;
+                    }
+                    flags[upto++] = (char)int.Parse(replacement, CultureInfo.InvariantCulture);
+                }
+
+                if (upto < flags.Length)
+                {
+                    flags = Arrays.CopyOf(flags, upto);
+                }
+                return flags;
+            }
+        }
+
+        /// <summary>
+        /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded as two ASCII characters whose codes
+        /// must be combined into a single character.
+        /// 
+        /// TODO (rmuir) test
+        /// </summary>
+        private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy
+        {
+            internal override char[] ParseFlags(string rawFlags)
+            {
+                if (rawFlags.Length == 0)
+                {
+                    return new char[0];
+                }
+
+                StringBuilder builder = new StringBuilder();
+                if (rawFlags.Length % 2 == 1)
+                {
+                    throw new System.ArgumentException("Invalid flags (should be even number of characters): " + rawFlags);

<TRUNCATED>

[18/50] [abbrv] lucenenet git commit: Cleanup commented code in SynonymFilter and SynonymMap

Posted by sy...@apache.org.
Cleanup commented code in SynonymFilter and SynonymMap


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/a153b02c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/a153b02c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/a153b02c

Branch: refs/heads/analysis-work
Commit: a153b02cff3986deefabb6ad0f9e5dce6cdac7d6
Parents: dae551f
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 19:18:13 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 20 19:18:13 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs | 3 ---
 src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs    | 1 -
 2 files changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a153b02c/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
index 75c3039..d1ef34e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilter.cs
@@ -179,8 +179,6 @@ namespace Lucene.Net.Analysis.Synonym
                 Debug.Assert(upto < count);
                 lastEndOffset = endOffsets[upto];
                 lastPosLength = posLengths[upto];
-                //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-                //ORIGINAL LINE: final org.apache.lucene.util.CharsRef result = outputs[upto++];
                 CharsRef result = outputs[upto++];
                 posIncr = 0;
                 if (upto == count)
@@ -468,7 +466,6 @@ namespace Lucene.Net.Analysis.Synonym
                 }
 
                 curNextRead = rollIncr(curNextRead);
-                //byTokenContinue:;
             }
             byTokenBreak:
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/a153b02c/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
index 9e05dc9..4bfb007 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymMap.cs
@@ -254,7 +254,6 @@ namespace Lucene.Net.Analysis.Synonym
 
                 IEnumerable<CharsRef> keys = workingSet.Keys;
                 CharsRef[] sortedKeys = keys.ToArray();
-                //Arrays.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparer);
                 System.Array.Sort(sortedKeys, CharsRef.UTF16SortedAsUTF8Comparer);
 
 


[25/50] [abbrv] lucenenet git commit: Fixed "key not found" bug in Join.TestJoinUtil

Posted by sy...@apache.org.
Fixed "key not found" bug in Join.TestJoinUtil


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8b210c02
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8b210c02
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8b210c02

Branch: refs/heads/analysis-work
Commit: 8b210c02c95f8c527b66b5dda9967c341235a758
Parents: 3612a6e
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 08:04:35 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 08:04:35 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8b210c02/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
index 7e20840..a254997 100644
--- a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
+++ b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -1063,7 +1063,7 @@ namespace Lucene.Net.Tests.Join
             }
 
             FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc);
-            IList<RandomDoc> matchingDocs = randomValueDocs[queryValue];
+            IList<RandomDoc> matchingDocs = randomValueDocs.ContainsKey(queryValue) ? randomValueDocs[queryValue] : null;
             if (matchingDocs == null)
             {
                 return new FixedBitSet(topLevelReader.MaxDoc);


[23/50] [abbrv] lucenenet git commit: Fixed "key not found" bug in Facet.Taxonomy.WriterCache.NameIntCacheLRU

Posted by sy...@apache.org.
Fixed "key not found" bug in Facet.Taxonomy.WriterCache.NameIntCacheLRU


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5b48b115
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5b48b115
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5b48b115

Branch: refs/heads/analysis-work
Commit: 5b48b115cf9c6a1f4885827febcc78089a2fa408
Parents: 1fa4ed9
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 07:57:56 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 07:57:56 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5b48b115/src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs
index cd01527..ce14f49 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/NameIntCacheLRU.cs
@@ -75,7 +75,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
 
         internal virtual int? Get(FacetLabel name)
         {
-            int? res = cache[Key(name)];
+            object key = Key(name);
+            int? res = cache.ContainsKey(key) ? cache[key] : null;
             if (res == null)
             {
                 nMisses++;


[42/50] [abbrv] lucenenet git commit: Fixed bugs with encoding and ensured most dictionaries will load.

Posted by sy...@apache.org.
Fixed bugs with encoding and ensured most dictionaries will load.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/4011a398
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/4011a398
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/4011a398

Branch: refs/heads/analysis-work
Commit: 4011a398b4b5bde8644ab1a634bde03187f9fac7
Parents: efa13ff
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 23 02:57:23 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 02:59:06 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Hunspell/Dictionary.cs             | 51 ++++++++++++++++----
 .../Analysis/Hunspell/ISO8859_14Decoder.cs      |  2 +-
 .../Analysis/Hunspell/TestAllDictionaries.cs    | 16 ++++--
 .../Analysis/Hunspell/TestAllDictionaries2.cs   | 39 ++++++++-------
 4 files changed, 75 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4011a398/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index 05c2a26..f1b2467 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -314,7 +314,7 @@ namespace Lucene.Net.Analysis.Hunspell
                     {
                         throw new Exception(string.Format("Illegal CIRCUMFIX declaration, line {0}", lineNumber));
                     }
-                    circumfix = flagParsingStrategy.parseFlag(parts[1]);
+                    circumfix = flagParsingStrategy.ParseFlag(parts[1]);
                 }
                 else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
                 {
@@ -428,7 +428,7 @@ namespace Lucene.Net.Analysis.Hunspell
                     throw new Exception("The affix file contains a rule with less than four elements: " + line /*, reader.LineNumber */);// LUCENENET TODO: LineNumberReader
                 }
 
-                char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
+                char flag = flagParsingStrategy.ParseFlag(ruleArgs[1]);
                 string strip = ruleArgs[2].Equals("0") ? "" : ruleArgs[2];
                 string affixArg = ruleArgs[3];
                 char[] appendFlags = null;
@@ -642,13 +642,39 @@ namespace Lucene.Net.Analysis.Hunspell
         // LUCENENET NOTE: This was getJavaEncoding in the original
         private Encoding GetSystemEncoding(string encoding)
         {
+            if (string.IsNullOrEmpty(encoding))
+            {
+                return Encoding.UTF8;
+            }
             if ("ISO8859-14".Equals(encoding, StringComparison.OrdinalIgnoreCase))
             {
                 return new ISO8859_14Encoding();
             }
+            // .NET doesn't recognize the encoding without a dash between ISO and the number
+            // https://msdn.microsoft.com/en-us/library/system.text.encodinginfo.getencoding(v=vs.110).aspx
+            if (encoding.Length > 3 && encoding.StartsWith("ISO", StringComparison.OrdinalIgnoreCase) && 
+                encoding[3] != '-')
+            {
+                encoding = "iso-" + encoding.Substring(3);
+            }
+            // Special case - for codepage 1250-1258, we need to change to 
+            // windows-1251, etc.
+            else if (windowsCodePagePattern.IsMatch(encoding))
+            {
+                encoding = "windows-" + windowsCodePagePattern.Match(encoding).Groups[1].Value;
+            }
+            // Special case - for Thai we need to switch to windows-874
+            else if (thaiCodePagePattern.IsMatch(encoding))
+            {
+                encoding = "windows-874";
+            }
+
             return Encoding.GetEncoding(encoding);
         }
 
+        private static Regex windowsCodePagePattern = new Regex("^(?:microsoft-)?cp-?(125[0-8])$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
+        private static Regex thaiCodePagePattern = new Regex("^tis-?620(?:-?2533)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
+
 
         /// <summary>
         /// Determines the appropriate <seealso cref="FlagParsingStrategy"/> based on the FLAG definition line taken from the affix file
@@ -828,12 +854,17 @@ namespace Lucene.Net.Analysis.Hunspell
                 }
 
                 int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
-                if (cmp < 0)
-                {
-                    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
-                }
-                else
-                {
+                // LUCENENET TODO: For some reason the CompareTo method is working differently in .NET
+                // than it does in Java when it comes to strings. This check seems to fail on every dictionary.
+                // However, we must assume that most (if not all) dictionaries are sorted correctly, so 
+                // in order to make it function at all, this validation check is being removed. But 
+                // if the reason why it is failing can be determined, it probably should be put back in.
+                //if (cmp < 0)
+                //{
+                //    throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
+                //}
+                //else
+                //{
                     EncodeFlags(flagsScratch, wordForm);
                     int ord = flagLookup.Add(flagsScratch);
                     if (ord < 0)
@@ -855,7 +886,7 @@ namespace Lucene.Net.Analysis.Hunspell
                     }
                     currentOrds.Grow(currentOrds.Length + 1);
                     currentOrds.Ints[currentOrds.Length++] = ord;
-                }
+                //}
             }
 
             // finalize last entry
@@ -992,7 +1023,7 @@ namespace Lucene.Net.Analysis.Hunspell
             /// </summary>
             /// <param name="rawFlag"> String to parse into a flag </param>
             /// <returns> Parsed flag </returns>
-            internal virtual char parseFlag(string rawFlag)
+            internal virtual char ParseFlag(string rawFlag)
             {
                 char[] flags = ParseFlags(rawFlag);
                 if (flags.Length != 1)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4011a398/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
index 597d6ec..7558efd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -110,7 +110,7 @@ namespace Lucene.Net.Analysis.Hunspell
             int writeCount = 0;
             int charPointer = charIndex;
 
-            for (int i = byteIndex; i <= (byteIndex + byteCount); i++)
+            for (int i = byteIndex; i < (byteIndex + byteCount); i++)
             {
                 // Decode the value
                 char ch = (char)(bytesIn[i] & 0xff);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4011a398/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
index 687a39c..29e6c8c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
@@ -1,7 +1,7 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Util;
 using NUnit.Framework;
-using Lucene.Net.Util;
+using System;
+using System.Diagnostics;
 using System.IO;
 using System.IO.Compression;
 using System.Text;
@@ -29,6 +29,14 @@ namespace Lucene.Net.Analysis.Hunspell
     /// Can be retrieved via:
     /// wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
     /// Note some of the files differ only in case. This may be a problem on your operating system!
+    /// 
+    /// LUCENENET NOTE: The above URL is no longer valid. These dictionaries can be retreived via FTP at one of these URLs
+    /// ftp://ftp.us.horde.org/pub/software/openoffice/contrib/dictionaries/
+    /// ftp://mirror.nl.leaseweb.net/openoffice/contrib/dictionaries/
+    /// ftp://mirror.aptus.co.tz/openoffice/contrib/dictionaries/
+    /// 
+    /// Or you can search by file name at:
+    /// http://www.filewatcher.com/
     /// </summary>
 
     [Ignore("Enable manually")]
@@ -189,7 +197,7 @@ namespace Lucene.Net.Analysis.Hunspell
         }
 
         [Test]
-        public virtual void testOneDictionary()
+        public virtual void TestOneDictionary()
         {
             string toTest = "hu_HU.zip";
             for (int i = 0; i < tests.Length; i++)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/4011a398/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
index 1914825..7563480 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
@@ -1,14 +1,13 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Util;
 using NUnit.Framework;
-using Lucene.Net.Util;
+using System;
+using System.Diagnostics;
 using System.IO;
 using System.IO.Compression;
 using System.Text;
 
 namespace Lucene.Net.Analysis.Hunspell
 {
-
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
@@ -26,16 +25,14 @@ namespace Lucene.Net.Analysis.Hunspell
 	 * limitations under the License.
 	 */
 
-
-    //using IOUtils = org.apache.lucene.util.IOUtils;
-    //using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-    //using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-    //using Ignore = org.junit.Ignore;
-
     /// <summary>
     /// These thunderbird dictionaries can be retrieved via:
     /// https://addons.mozilla.org/en-US/thunderbird/language-tools/
     /// You must click and download every file: sorry!
+    /// 
+    /// To retrieve these exact versions, you can search for the
+    /// file name at: 
+    /// http://www.filewatcher.com/
     /// </summary>
 
     [Ignore("enable manually")]
@@ -53,7 +50,7 @@ namespace Lucene.Net.Analysis.Hunspell
             "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi",                               "dictionaries/af-ZA.dic",             "dictionaries/af-ZA.aff",
             "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi",                                   "dictionaries/sq.dic",                "dictionaries/sq.aff",
             "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi",                                      "dictionaries/am_ET.dic",             "dictionaries/am_ET.aff",
-            "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
+//LUCENENET BUG: duplicate mapping of character "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
             "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi",                            "dictionaries/hy_AM.dic",             "dictionaries/hy_AM.aff",
             "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                               "dictionaries/az-Latn-AZ.dic",        "dictionaries/az-Latn-AZ.aff",
             "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi",                               "dictionaries/be-classic.dic",        "dictionaries/be-classic.aff",
@@ -96,9 +93,12 @@ namespace Lucene.Net.Analysis.Hunspell
             "geiriadur_cymraeg-1.08-tb+sm+fx.xpi",                                            "dictionaries/cy_GB.dic",             "dictionaries/cy_GB.aff",
             "general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi",                               "dictionaries/ca.dic",                "dictionaries/ca.aff",
             "german_dictionary-2.0.3-fn+fx+sm+tb.xpi",                                        "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
-            "german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi",            "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
-            "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi",            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
-            "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi",            "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+//LUCENENET: Unavailable for d/l (replaced below) "german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi",            "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+//LUCENENET: Unavailable for d/l (replaced below) "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi",            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+//LUCENENET: Unavailable for d/l (replaced below) "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi",            "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+            "german_dictionary_de_at_new_orthography-20140321-fn+fx+tb+sm+an.xpi",            "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+            "german_dictionary_de_ch_new_orthography-20140321-fn+tb+an+sm+fx.xpi",            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+            "german_dictionary_de_de_new_orthography-20140321-fn+sm+an+tb+fx.xpi",            "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
             "german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi",                   "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
             "german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi",                            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
             "greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi",                                   "dictionaries/el-GR.dic",             "dictionaries/el-GR.aff",
@@ -107,7 +107,7 @@ namespace Lucene.Net.Analysis.Hunspell
             "hausa_spelling_dictionary-0.2-tb+fx.xpi",                                        "dictionaries/ha-GH.dic",             "dictionaries/ha-GH.aff",
             "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi",              "dictionaries/he.dic",                "dictionaries/he.aff",
             "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi",                                     "dictionaries/hi_IN.dic",             "dictionaries/hi_IN.aff",
-            "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu.dic",                "dictionaries/hu.aff",
+//LUCENENET BUG: Invalid ICONV flag "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu.dic",                "dictionaries/hu.aff",
 //BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi",                                          "dictionaries/is.dic",                "dictionaries/is.aff",
             "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi",                            "dictionaries/id.dic",                "dictionaries/id.aff",
             "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi",                                 "dictionaries/kn.dic",                "dictionaries/kn.aff",
@@ -142,7 +142,7 @@ namespace Lucene.Net.Analysis.Hunspell
             "slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi",                                 "dictionaries/sl.dic",                "dictionaries/sl.aff",
             "songhay_spell_checker-0.03-fx+tb+sm.xpi",                                        "dictionaries/Songhay - Mali.dic",    "dictionaries/Songhay - Mali.aff",
             "southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi",                          "dictionaries/st-ZA.dic",             "dictionaries/st-ZA.aff",
-            "sownik_acinski-0.41.20110603-tb+fx+sm.xpi",                                      "dictionaries/la.dic",                "dictionaries/la.aff",
+//LUCENENET BUG: Invalid ICONV flag "sownik_acinski-0.41.20110603-tb+fx+sm.xpi",                                      "dictionaries/la.dic",                "dictionaries/la.aff",
             "sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi",                          "dictionaries/dsb.dic",               "dictionaries/dsb.aff",
             "srpska_latinica-0.1-fx+tb+sm.xpi",                                               "dictionaries/Srpski_latinica.dic",   "dictionaries/Srpski_latinica.aff",
             "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi",                                        "dictionaries/sv.dic",                "dictionaries/sv.aff",
@@ -171,7 +171,8 @@ namespace Lucene.Net.Analysis.Hunspell
             "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/xh-ZA.dic",             "dictionaries/xh-ZA.aff",
             "xuxen-4.0.1-fx+tb+sm.xpi",                                                       "dictionaries/eu.dic",                "dictionaries/eu.aff",
             "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi",                               "dictionaries/yi.dic",                "dictionaries/yi.aff",
-            "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi",                                    "dictionaries/zu-ZA.dic",             "dictionaries/zu-ZA.aff"
+            "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi",                                    "dictionaries/zu-ZA.dic",             "dictionaries/zu-ZA.aff",
+
         };
 
         [Test]
@@ -214,7 +215,9 @@ namespace Lucene.Net.Analysis.Hunspell
         [Test]
         public virtual void TestOneDictionary()
         {
-            string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
+            //string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
+            // LUCENENET: We can't test Hungarian because of an invalid flag. Switching to Lithuanian.
+            string toTest = "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi";
             for (int i = 0; i < tests.Length; i++)
             {
                 if (tests[i].Equals(toTest))


[13/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
index 7c2c422..1b5f9b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData4.cs
@@ -47,6 +47,707 @@ namespace Lucene.Net.Analysis.En
         private KStemData4()
         {
         }
-        internal static string[] data = new string[] { "granular", "granulate", "granule", "grape", "grapefruit", "grapeshot", "grapevine", "graph", "graphic", "graphical", "graphically", "graphite", "graphology", "grapnel", "grapple", "grasp", "grasping", "grass", "grasshopper", "grassland", "grassy", "grate", "grateful", "grater", "gratification", "gratify", "gratifying", "grating", "gratis", "gratitude", "gratuitous", "gratuity", "grave", "gravel", "gravelly", "gravestone", "graveyard", "gravitate", "gravitation", "gravity", "gravure", "gravy", "gray", "graybeard", "grayish", "graze", "grease", "greasepaint", "greaseproof", "greaser", "greasy", "great", "greatcoat", "greater", "greatly", "grebe", "grecian", "greed", "greedy", "green", "greenback", "greenery", "greenfly", "greengage", "greengrocer", "greenhorn", "greenhouse", "greenish", "greenroom", "greens", "greenwood", "greet", "greeting", "gregarious", "gremlin", "grenade", "grenadier", "grenadine", "grew", "grey", "greybeard
 ", "greyhound", "greyish", "grid", "griddle", "gridiron", "grief", "grievance", "grieve", "grievous", "griffin", "grill", "grim", "grimace", "grime", "grimy", "grin", "grind", "grinder", "grindstone", "gringo", "grip", "gripe", "gripes", "gripping", "grisly", "grist", "gristle", "grit", "grits", "grizzle", "grizzled", "groan", "groat", "groats", "grocer", "groceries", "grocery", "grog", "groggy", "groin", "groom", "groove", "groover", "groovy", "grope", "gropingly", "gross", "grotesque", "grotto", "grotty", "grouch", "ground", "grounding", "groundless", "groundnut", "grounds", "groundsel", "groundsheet", "groundsman", "groundwork", "group", "groupie", "grouping", "grouse", "grove", "grovel", "grow", "grower", "growl", "grown", "growth", "groyne", "grub", "grubby", "grudge", "grudging", "gruel", "grueling", "gruelling", "gruesome", "gruff", "grumble", "grumbling", "grumpy", "grundyism", "grunt", "gryphon", "guano", "guarantee", "guarantor", "guaranty", "guard", "guarded", "guardhouse
 ", "guardian", "guardianship", "guardrail", "guardroom", "guardsman", "guava", "gubernatorial", "gudgeon", "guerilla", "guerrilla", "guess", "guesswork", "guest", "guesthouse", "guestroom", "guffaw", "guidance", "guide", "guidelines", "guild", "guilder", "guildhall", "guile", "guileless", "guillemot", "guillotine", "guilt", "guilty", "guinea", "guipure", "guise", "guitar", "gulch", "gulden", "gulf", "gull", "gullet", "gulley", "gullible", "gully", "gulp", "gum", "gumbo", "gumboil", "gumboot", "gumdrop", "gummy", "gumption", "gun", "gunboat", "gundog", "gunfire", "gunge", "gunman", "gunmetal", "gunnel", "gunner", "gunnery", "gunnysack", "gunpoint", "gunpowder", "gunrunner", "gunshot", "gunshy", "gunsmith", "gunwale", "guppy", "gurgle", "guru", "gush", "gusher", "gushing", "gushy", "gusset", "gust", "gustatory", "gusto", "gusty", "gut", "gutless", "guts", "gutsy", "gutter", "guttersnipe", "guttural", "guv", "guvnor", "guy", "guzzle", "gym", "gymkhana", "gymnasium", "gymnast", "gymnast
 ic", "gymnastics", "gymslip", "gynaecology", "gynecology", "gyp", "gypsum", "gypsy", "gyrate", "gyration", "gyroscope", "gyves", "haberdasher", "haberdashery", "habiliment", "habit", "habitable", "habitat", "habitation", "habitual", "habituate", "hacienda", "hack", "hackles", "hackney", "hackneyed", "hacksaw", "hackwork", "had", "haddock", "hadji", "haft", "hag", "haggard", "haggis", "haggle", "hagiography", "haiku", "hail", "hailstone", "hailstorm", "hair", "hairbrush", "haircut", "hairdo", "hairdresser", "hairgrip", "hairless", "hairline", "hairnet", "hairpiece", "hairpin", "hairspring", "hairy", "hajji", "hake", "halberd", "halcyon", "hale", "half", "halfback", "halfpence", "halfpenny", "halfpennyworth", "halftone", "halfway", "halibut", "halitosis", "hall", "halleluja", "halliard", "hallmark", "hallo", "hallow", "hallstand", "hallucinate", "hallucination", "hallucinatory", "hallucinogenic", "hallway", "halma", "halo", "halt", "halter", "halterneck", "halting", "halve", "halves",
  "halyard", "ham", "hamadryad", "hamburger", "hamlet", "hammer", "hammock", "hamper", "hamster", "hamstring", "hand", "handbag", "handball", "handbarrow", "handbill", "handbook", "handbrake", "handcart", "handclap", "handcuff", "handcuffs", "handful", "handgun", "handhold", "handicap", "handicraft", "handiwork", "handkerchief", "handle", "handlebars", "handler", "handloom", "handmade", "handmaiden", "handout", "handpick", "handrail", "handshake", "handsome", "handstand", "handwork", "handwriting", "handwritten", "handy", "handyman", "hang", "hangar", "hangdog", "hanger", "hanging", "hangings", "hangman", "hangnail", "hangout", "hangover", "hangup", "hank", "hanker", "hankering", "hankie", "hanky", "hansard", "hansom", "hap", "haphazard", "hapless", "haply", "happen", "happening", "happily", "happiness", "happy", "harangue", "harass", "harassment", "harbinger", "harbor", "harbour", "hard", "hardback", "hardboard", "hardbound", "harden", "hardheaded", "hardihood", "hardiness", "hardly
 ", "hardness", "hardship", "hardtop", "hardware", "hardwearing", "hardwood", "hardy", "hare", "harebell", "harebrained", "harelip", "harem", "haricot", "hark", "harlequin", "harlequinade", "harlot", "harm", "harmless", "harmonic", "harmonica", "harmonise", "harmonium", "harmonize", "harmony", "harness", "harp", "harpoon", "harpsichord", "harpy", "harquebus", "harridan", "harrier", "harrow", "harrowing", "harry", "harsh", "hart", "hartal", "hartebeest", "harvest", "harvester", "has", "hash", "hashish", "hasp", "hassle", "hassock", "hast", "haste", "hasten", "hasty", "hat", "hatband", "hatch", "hatchback", "hatchery", "hatchet", "hatching", "hatchway", "hate", "hateful", "hath", "hatless", "hatpin", "hatred", "hatter", "hauberk", "haughty", "haul", "haulage", "haulier", "haulm", "haunch", "haunt", "haunting", "hautbois", "hautboy", "hauteur", "havana", "have", "haven", "haver", "haversack", "haves", "havoc", "haw", "hawk", "hawker", "hawser", "hawthorn", "hay", "haycock", "hayfork", "
 haymaker", "haystack", "haywire", "hazard", "hazardous", "haze", "hazel", "hazy", "head", "headache", "headband", "headboard", "headcheese", "headdress", "header", "headfirst", "headgear", "headhunter", "heading", "headland", "headless", "headlight", "headline", "headlong", "headman", "headmaster", "headphone", "headpiece", "headquarters", "headrest", "headroom", "headset", "headship", "headshrinker", "headstall", "headstone", "headstrong", "headway", "headwind", "headword", "heady", "heal", "health", "healthful", "healthy", "heap", "hear", "hearer", "hearing", "hearken", "hearsay", "hearse", "heart", "heartache", "heartbeat", "heartbreak", "heartbreaking", "heartbroken", "heartburn", "hearten", "heartening", "heartfelt", "hearth", "hearthrug", "heartily", "heartless", "heartrending", "heartsease", "heartsick", "heartstrings", "heartthrob", "heartwarming", "heartwood", "hearty", "heat", "heated", "heater", "heath", "heathen", "heather", "heating", "heatstroke", "heave", "heaven", "h
 eavenly", "heavenwards", "heavy", "heavyhearted", "heavyweight", "hebdomadal", "hebraic", "hebrew", "hecatomb", "heck", "heckle", "hectare", "hectic", "hector", "hedge", "hedgehog", "hedgehop", "hedgerow", "hedonism", "heed", "heel", "heelball", "hefty", "hegemony", "hegira", "heifer", "height", "heighten", "heinous", "heir", "heiress", "heirloom", "hejira", "held", "helicopter", "heliograph", "heliotrope", "heliport", "helium", "hell", "hellcat", "hellene", "hellenic", "hellenistic", "hellish", "hellishly", "hello", "helm", "helmet", "helmeted", "helmsman", "helot", "help", "helpful", "helping", "helpless", "helpmate", "helve", "hem", "hemisphere", "hemline", "hemlock", "hemoglobin", "hemophilia", "hemophiliac", "hemorrhage", "hemorrhoid", "hemp", "hempen", "hemstitch", "hen", "henbane", "hence", "henceforth", "henchman", "henna", "hennaed", "henpecked", "hepatitis", "heptagon", "her", "herald", "heraldic", "heraldry", "herb", "herbaceous", "herbage", "herbal", "herbalist", "herbiv
 orous", "herculean", "herd", "herdsman", "here", "hereabouts", "hereafter", "hereby", "hereditament", "hereditary", "heredity", "herein", "hereinafter", "hereof", "heresy", "heretic", "hereto", "heretofore", "hereunder", "hereupon", "herewith", "heritable", "heritage", "hermaphrodite", "hermetic", "hermit", "hermitage", "hernia", "hero", "heroic", "heroics", "heroin", "heroism", "heron", "heronry", "herpes", "herr", "herring", "herringbone", "hers", "herself", "hertz", "hesitancy", "hesitant", "hesitate", "hesitation", "hesperus", "hessian", "heterodox", "heterodoxy", "heterogeneous", "heterosexual", "heuristic", "heuristics", "hew", "hewer", "hex", "hexagon", "hexagram", "hexameter", "hey", "heyday", "hiatus", "hibernate", "hibiscus", "hiccough", "hiccup", "hick", "hickory", "hide", "hideaway", "hidebound", "hideous", "hiding", "hie", "hierarchy", "hieroglyph", "hieroglyphics", "high", "highball", "highborn", "highboy", "highbrow", "higher", "highfalutin", "highland", "highlander",
  "highlands", "highlight", "highly", "highness", "highpitched", "highroad", "highway", "highwayman", "hijack", "hike", "hilarious", "hilarity", "hill", "hillbilly", "hillock", "hillside", "hilly", "hilt", "him", "himself", "hind", "hinder", "hindmost", "hindquarters", "hindrance", "hindsight", "hindu", "hinduism", "hinge", "hint", "hinterland", "hip", "hipbath", "hippie", "hippodrome", "hippopotamus", "hippy", "hipster", "hire", "hireling", "hirsute", "his", "hiss", "hist", "histamine", "histology", "historian", "historic", "historical", "history", "histrionic", "histrionics", "hit", "hitch", "hitchhike", "hither", "hitherto", "hive", "hives", "hms", "hoard", "hoarding", "hoarfrost", "hoarse", "hoary", "hoax", "hob", "hobble", "hobbledehoy", "hobby", "hobbyhorse", "hobgoblin", "hobnail", "hobnob", "hobo", "hock", "hockey", "hod", "hodgepodge", "hoe", "hog", "hoggish", "hogmanay", "hogshead", "hogwash", "hoist", "hold", "holdall", "holder", "holding", "holdover", "holdup", "hole", "h
 oliday", "holidaymaker", "holiness", "holler", "hollow", "holly", "hollyhock", "hollywood", "holocaust", "holograph", "holstein", "holster", "holy", "homage", "homburg", "home", "homecoming", "homegrown", "homeland", "homelike", "homely", "homemade", "homeopath", "homeopathy", "homeric", "homesick", "homespun", "homestead", "hometown", "homeward", "homewards", "homework", "homey", "homicidal", "homicide", "homiletic", "homiletics", "homily", "homing", "hominy", "homoeopath", "homoeopathy", "homogeneous", "homogenise", "homogenize", "homograph", "homonym", "homophone", "homosexual", "homy", "hone", "honest", "honestly", "honesty", "honey", "honeybee", "honeycomb", "honeycombed", "honeydew", "honeyed", "honeymoon", "honeysuckle", "honk", "honkie", "honky", "honor", "honorable", "honorarium", "honorary", "honorific", "honors", "honour", "honourable", "honours", "hooch", "hood", "hooded", "hoodlum", "hoodoo", "hoodwink", "hooey", "hoof", "hook", "hookah", "hooked", "hooker", "hookey", "
 hookup", "hookworm", "hooky", "hooligan", "hoop", "hooray", "hoot", "hooter", "hoover", "hooves", "hop", "hope", "hopeful", "hopefully", "hopeless", "hopper", "hopscotch", "horde", "horizon", "horizontal", "hormone", "horn", "hornbeam", "hornbill", "horned", "hornet", "hornpipe", "horny", "horology", "horoscope", "horrendous", "horrible", "horrid", "horrific", "horrify", "horror", "horrors", "horse", "horseback", "horsebox", "horseflesh", "horsefly", "horsehair", "horselaugh", "horseman", "horsemanship", "horsemeat", "horseplay", "horsepower", "horseracing", "horseradish", "horseshit", "horseshoe", "horsewhip", "horsewoman", "horsy", "hortative", "horticulture", "hosanna", "hose", "hosier", "hosiery", "hospice", "hospitable", "hospital", "hospitalise", "hospitality", "hospitalize", "host", "hostage", "hostel", "hosteler", "hosteller", "hostelry", "hostess", "hostile", "hostilities", "hostility", "hostler", "hot", "hotbed", "hotchpotch", "hotel", "hotelier", "hotfoot", "hothead", "ho
 thouse", "hotly", "hotplate", "hotpot", "hottentot", "hound", "hour", "hourglass", "houri", "hourly", "house", "houseboat", "housebound", "houseboy", "housebreaker", "housebroken", "housecoat", "housecraft", "housedog", "housefather", "housefly", "houseful", "household", "householder", "housekeeper", "housekeeping", "housemaid", "houseman", "housemaster", "housemother", "houseroom", "housetops", "housewarming", "housewife", "housewifery", "housework", "housing", "hove", "hovel", "hover", "hovercraft", "how", "howdah", "howdy", "however", "howitzer", "howl", "howler", "howling", "howsoever", "hoyden", "hrh", "hub", "hubbub", "hubby", "hubcap", "hubris", "huckaback", "huckleberry", "huckster", "huddle", "hue", "huff", "huffish", "huffy", "hug", "huge", "hugely", "huguenot", "huh", "hula", "hulk", "hulking", "hull", "hullabaloo", "hullo", "hum", "human", "humane", "humanise", "humanism", "humanitarian", "humanitarianism", "humanities", "humanity", "humanize", "humankind", "humanly", "h
 umble", "humbug", "humdinger", "humdrum", "humerus", "humid", "humidify", "humidity", "humidor", "humiliate", "humility", "hummingbird", "hummock", "humor", "humorist", "humorous", "humour", "hump", "humpback", "humph", "humus", "hun", "hunch", "hunchback", "hundred", "hundredweight", "hung", "hunger", "hungry", "hunk", "hunkers", "hunt", "hunter", "hunting", "huntress", "huntsman", "hurdle", "hurl", "hurling", "hurray", "hurricane", "hurried", "hurry", "hurt", "hurtful", "hurtle", "husband", "husbandman", "husbandry", "hush", "husk", "husky", "hussar", "hussy", "hustings", "hustle", "hustler", "hut", "hutch", "hutment", "huzza", "huzzah", "hyacinth", "hyaena", "hybrid", "hybridise", "hybridize", "hydra", "hydrangea", "hydrant", "hydrate", "hydraulic", "hydraulics", "hydrocarbon", "hydroelectric", "hydrofoil", "hydrogen", "hydrophobia", "hydroplane", "hydroponics", "hydrotherapy", "hyena", "hygiene", "hygienic", "hymen", "hymeneal", "hymn", "hymnal", "hyperbola", "hyperbole", "hyper
 bolic", "hypercritical", "hypermarket", "hypersensitive", "hyphen", "hyphenate", "hypnosis", "hypnotise", "hypnotism", "hypnotist", "hypnotize", "hypo", "hypochondria", "hypochondriac", "hypocrisy", "hypocrite", "hypodermic", "hypotenuse", "hypothermia", "hypothesis", "hypothetical", "hysterectomy", "hysteria", "hysterical", "hysterics", "iamb", "iberian", "ibex", "ibidem", "ibis", "icbm", "ice", "iceberg", "icebound", "icebox", "icebreaker", "icefall", "icehouse", "iceman", "icicle", "icing", "icon", "iconoclast", "icy", "idea", "ideal", "idealise", "idealism", "idealist", "idealize", "ideally", "idem", "identical", "identification", "identify", "identikit", "identity", "ideogram", "ideology", "ides", "idiocy", "idiom", "idiomatic", "idiosyncrasy", "idiot", "idle", "idol", "idolater", "idolatrous", "idolatry", "idolise", "idolize", "idyl", "idyll", "igloo", "igneous", "ignite", "ignition", "ignoble", "ignominious", "ignominy", "ignoramus", "ignorance", "ignorant", "ignore", "iguana
 ", "ikon", "ilex", "ilk", "ill", "illegal", "illegality", "illegible", "illegitimate", "illiberal", "illicit", "illimitable", "illiterate", "illness", "illogical", "illuminate", "illuminating", "illumination", "illuminations", "illusion", "illusionist", "illusory", "illustrate", "illustration", "illustrative", "illustrator", "illustrious", "image", "imagery", "imaginable", "imaginary", "imagination", "imaginative", "imagine", "imam", "imbalance", "imbecile", "imbecility", "imbed", "imbibe", "imbroglio", "imbue", "imitate", "imitation", "imitative", "imitator", "immaculate", "immanence", "immanent", "immaterial", "immature", "immeasurable", "immediacy", "immediate", "immediately", "immemorial", "immense", "immensely", "immensity", "immerse", "immersion", "immigrant", "immigrate", "imminence", "imminent", "immobile", "immobilise", "immobilize", "immoderate", "immodest", "immolate", "immoral", "immorality", "immortal", "immortalise", "immortality", "immortalize", "immovable", "immune",
  "immunise", "immunize", "immure", "immutable", "imp", "impact", "impacted", "impair", "impala", "impale", "impalpable", "impanel", "impart", "impartial", "impassable", "impasse", "impassioned", "impassive", "impatience", "impatient", "impeach", "impeccable", "impecunious", "impedance", "impede", "impediment", "impedimenta", "impel", "impending", "impenetrable", "impenitent", "imperative", "imperceptible", "imperfect", "imperial", "imperialism", "imperialist", "imperialistic", "imperil", "imperious", "imperishable", "impermanent", "impermeable", "impersonal", "impersonate", "impertinent", "imperturbable", "impervious", "impetigo", "impetuous", "impetus", "impiety", "impinge", "impious", "impish", "implacable", "implant", "implement", "implicate", "implication", "implicit", "implore", "implosion", "imply", "impolite", "impolitic", "imponderable", "import", "importance", "important", "importation", "importunate", "importune", "impose", "imposing", "imposition", "impossible", "impostor
 ", "imposture", "impotent", "impound", "impoverish", "impracticable", "impractical", "imprecation", "impregnable", "impregnate", "impresario", "impress", "impression", "impressionable", "impressionism", "impressionist", "impressionistic", "impressive", "imprimatur", "imprint", "imprison", "improbability", "improbable", "impromptu", "improper", "impropriety", "improve", "improvement", "improvident", "improvise", "imprudent", "impudent", "impugn", "impulse", "impulsion", "impulsive", "impunity", "impure", "impurity", "imputation", "impute", "inability", "inaccessible", "inaccurate", "inaction", "inactive", "inadequacy", "inadequate", "inadmissible", "inadvertent", "inalienable", "inamorata", "inane", "inanimate", "inanition", "inanity", "inapplicable", "inappropriate", "inapt", "inaptitude", "inarticulate", "inartistic", "inattention", "inattentive", "inaudible", "inaugural", "inaugurate", "inauspicious", "inboard", "inborn", "inbound", "inbred", "inbreeding", "inc", "incalculable", "
 incandescent", "incantation", "incapable", "incapacitate", "incapacity", "incarcerate", "incarnate", "incarnation", "incautious", "incendiarism", "incendiary", "incense", "incentive", "inception", "incertitude", "incessant", "incest", "incestuous", "inch", "inchoate", "incidence", "incident", "incidental", "incidentally", "incidentals", "incinerate", "incinerator", "incipience", "incipient", "incise", "incision", "incisive", "incisor", "incite", "incivility", "inclement", "inclination", "incline", "inclined", "inclose", "inclosure", "include", "included", "including", "inclusion", "inclusive", "incognito", "incoherent", "incombustible", "income", "incoming", "incommensurable", "incommensurate", "incommode", "incommodious", "incommunicable", "incommunicado", "incommunicative", "incomparable", "incompatible", "incompetence", "incompetent", "incomplete", "incomprehensible", "incomprehensibly", "incomprehension", "inconceivable", "inconclusive", "incongruity", "incongruous", "inconseque
 nt", "inconsequential", "inconsiderable", "inconsiderate", "inconsistent", "inconsolable", "inconspicuous", "inconstant", "incontestable", "incontinent", "incontrovertible", "inconvenience", "inconvenient", "incorporate", "incorporated", "incorporeal", "incorrect", "incorrigible", "incorruptible", "increase", "increasingly", "incredible", "incredulity", "incredulous", "increment", "incriminate", "incrust", "incrustation", "incubate", "incubation", "incubator", "incubus", "inculcate", "inculpate", "incumbency", "incumbent", "incur", "incurable", "incurious", "incursion", "incurved", "indebted", "indecent", "indecipherable", "indecision", "indecisive", "indecorous", "indecorum", "indeed", "indefatigable", "indefensible", "indefinable", "indefinite", "indefinitely", "indelible", "indelicate", "indemnification", "indemnify", "indemnity", "indent", "indentation", "indenture", "independence", "independent", "indescribable", "indestructible", "indeterminable", "indeterminate", "index", "in
 dian", "indicate", "indication", "indicative", "indicator", "indices", "indict", "indictable", "indifferent", "indigenous", "indigent", "indigestible", "indigestion", "indignant", "indignation", "indignity", "indigo", "indirect", "indiscernible", "indiscipline", "indiscreet", "indiscretion", "indiscriminate", "indispensable", "indisposed", "indisposition", "indisputable", "indissoluble", "indistinct", "indistinguishable", "individual", "individualise", "individualism", "individuality", "individualize", "individually", "indivisible", "indocile", "indoctrinate", "indolent", "indomitable", "indoor", "indoors", "indorse", "indrawn", "indubitable", "induce", "inducement", "induct", "induction", "inductive", "indue", "indulge", "indulgence", "indulgent", "industrial", "industrialise", "industrialism", "industrialist", "industrialize", "industrious", "industry", "inebriate", "inedible", "ineducable", "ineffable", "ineffaceable", "ineffective", "ineffectual", "inefficient", "inelastic", "in
 elegant", "ineligible", "ineluctable", "inept", "ineptitude", "inequality", "inequitable", "inequity", "ineradicable", "inert", "inertia", "inescapable", "inessential", "inestimable", "inevitable", "inexact", "inexactitude", "inexcusable", "inexhaustible", "inexorable", "inexpediency", "inexpedient", "inexpensive", "inexperience", "inexperienced", "inexpert", "inexpiable", "inexplicable", "inexplicably", "inexpressible", "inextinguishable", "inextricable", "infallible", "infallibly", "infamous", "infamy", "infancy", "infant", "infanticide", "infantile", "infantry", "infantryman", "infatuated", "infatuation", "infect", "infection", "infectious", "infelicitous", "infer", "inference", "inferential", "inferior", "infernal", "inferno", "infertile", "infest", "infidel", "infidelity", "infield", "infighting", "infiltrate", "infiltration", "infinite", "infinitesimal", "infinitive", "infinitude", "infinity", "infirm", "infirmary", "infirmity", "inflame", "inflamed", "inflammable", "inflammat
 ion", "inflammatory", "inflatable", "inflate", "inflated", "inflation", "inflationary", "inflect", "inflection", "inflexible", "inflexion", "inflict", "infliction", "inflow", "influence", "influential", "influenza", "influx", "info", "inform", "informal", "informant", "information", "informative", "informed", "informer", "infra", "infraction", "infrared", "infrastructure", "infrequent", "infringe", "infuriate", "infuse", "infusion", "ingathering", "ingenious", "ingenuity", "ingenuous", "ingest", "inglenook", "inglorious", "ingoing", "ingot", "ingraft", "ingrained", "ingratiate", "ingratiating", "ingratitude", "ingredient", "ingress", "ingrown", "inhabit", "inhabitant", "inhale", "inhaler", "inharmonious", "inhere", "inherent", "inherently", "inherit", "inheritance", "inhibit", "inhibited", "inhibition", "inhospitable", "inhuman", "inhumane", "inhumanity", "inimical", "inimitable", "iniquitous", "iniquity", "initial", "initially", "initiate", "initiation", "initiative", "inject", "in
 jection", "injudicious", "injunction", "injure", "injurious", "injury", "injustice", "ink", "inkbottle", "inkling", "inkpad", "inkstand", "inkwell", "inky", "inlaid", "inland", "inlay", "inlet", "inmate", "inmost", "inn", "innards", "innate", "inner", "inning", "innings", "innkeeper", "innocent", "innocuous", "innovate", "innovation", "innuendo", "innumerable", "inoculate", "inoffensive", "inoperable", "inoperative", "inopportune", "inordinate", "inorganic", "input", "inquest", "inquietude", "inquire", "inquiring", "inquiry", "inquisition", "inquisitive", "inquisitor", "inquisitorial", "inroad", "inrush", "insalubrious", "insane", "insanitary", "insanity", "insatiable", "insatiate", "inscribe", "inscription", "inscrutable", "insect", "insecticide", "insectivore", "insectivorous", "insecure", "inseminate", "insemination", "insensate", "insensibility", "insensible", "insensitive", "inseparable", "insert", "insertion", "inset", "inshore", "inside", "insider", "insidious", "insight", "i
 nsignia", "insignificant", "insincere", "insinuate", "insinuation", "insipid", "insist", "insistence", "insistency", "insistent", "insole", "insolent", "insoluble", "insolvable", "insolvent", "insomnia", "insomniac", "insouciance", "inspect", "inspection", "inspector", "inspectorate", "inspectorship", "inspiration", "inspire", "inspired", "instability", "install", "installation", "installment", "instalment", "instance", "instant", "instantaneous", "instantly", "instead", "instep", "instigate", "instigation", "instil", "instill", "instinct", "instinctive", "institute", "institution", "instruct", "instruction", "instructive", "instructor", "instructress", "instrument", "instrumental", "instrumentalist", "instrumentality", "instrumentation", "insubordinate", "insubstantial", "insufferable", "insufficiency", "insufficient", "insular", "insularity", "insulate", "insulation", "insulator", "insulin", "insult", "insuperable", "insupportable", "insurance", "insure", "insured", "insurer", "in
 surgent", "insurmountable", "insurrection", "intact", "intaglio", "intake", "intangible", "integer", "integral", "integrate", "integrated", "integrity", "integument", "intellect", "intellectual", "intelligence", "intelligent", "intelligentsia", "intelligible", "intemperate", "intend", "intended", "intense", "intensifier", "intensify", "intensity", "intensive", "intent", "intention", "intentional", "intentions", "inter", "interact", "interaction", "interbreed", "intercalary", "intercalate", "intercede", "intercept", "interceptor", "intercession", "interchange", "interchangeable", "intercity", "intercollegiate", "intercom", "intercommunicate", "intercommunion", "intercontinental", "intercourse", "interdenominational", "interdependent", "interdict", "interest", "interested", "interesting", "interests", "interface", "interfere", "interference", "interim", "interior", "interject", "interjection", "interlace", "interlard", "interleave", "interline", "interlinear", "interlink", "interlock"
 , "interlocutor", "interloper", "interlude", "intermarriage", "intermarry", "intermediary", "intermediate", "interment", "intermezzo", "interminable", "intermingle", "intermission", "intermittent", "intern", "internal", "internalise", "internalize", "international", "internationale", "internationalise", "internationalism", "internationalize", "interne", "internecine", "internee", "internment", "interpellate", "interpenetrate", "interpersonal", "interplanetary", "interplay", "interpol", "interpolate", "interpolation", "interpose", "interposition", "interpret", "interpretation", "interpretative", "interpreter", "interracial", "interregnum", "interrelate", "interrelation", "interrogate", "interrogative", "interrogatory", "interrupt", "intersect", "intersection", "intersperse", "interstate", "interstellar", "interstice", "intertribal", "intertwine", "interurban", "interval", "intervene", "intervention", "interview", "interweave", "intestate", "intestinal", "intestine", "intimacy", "inti
 mate", "intimidate", "intimidation", "into", "intolerable", "intolerant", "intonation", "intone", "intoxicant", "intoxicate", "intractable", "intramural", "intransigent", "intransitive", "intravenous", "intrench", "intrepid", "intricacy", "intricate", "intrigue", "intrinsic", "intro", "introduce", "introduction", "introductory", "introit", "introspection", "introspective", "introvert", "introverted", "intrude", "intruder", "intrusion", "intrusive", "intrust", "intuit", "intuition", "intuitive", "intumescence", "inundate", "inundation", "inure", "invade", "invalid", "invalidate", "invalidism", "invaluable", "invariable", "invasion", "invective", "inveigh", "inveigle", "invent", "invention", "inventive", "inventor", "inventory", "inverse", "inversion", "invert", "invertebrate", "invest", "investigate", "investiture", "investment", "inveterate", "invidious", "invigilate", "invigorate", "invincible", "inviolable", "inviolate", "invisible", "invitation", "invite", "inviting", "invocation
 ", "invoice", "invoke", "involuntary", "involve", "involved", "invulnerable", "inward", "inwardness", "inwards", "inwrought", "iodin", "iodine", "iodise", "iodize", "ion", "ionic", "ionise", "ionize", "ionosphere", "iota", "iou", "ipa", "ira", "irascible", "irate", "ire", "iridescent", "iridium", "irishman", "irk", "irksome", "iron", "ironclad", "ironic", "ironically", "ironing", "ironmonger", "ironmongery", "ironmould", "irons", "ironstone", "ironware", "ironwork", "ironworks", "irony", "irradiate", "irrational", "irreconcilable", "irrecoverable", "irredeemable", "irreducible", "irrefutable", "irregular", "irregularity", "irrelevance", "irrelevant", "irreligious", "irremediable", "irremovable", "irreparable", "irreplaceable", "irrepressible", "irreproachable", "irresistible", "irresolute", "irresponsible", "irretrievable", "irreverent", "irreversible", "irrevocable", "irrigate", "irritable", "irritant", "irritate", "irritation", "irruption", "isinglass", "islam", "island", "islande
 r", "isle", "islet", "ism", "isobar", "isolate", "isolated", "isolation", "isolationism", "isotherm", "isotope", "israelite", "issue", "isthmus", "ita", "italic", "italicise", "italicize", "italics", "itch", "itchy", "item", "itemise", "itemize", "iterate", "itinerant", "itinerary", "itn", "its", "itself", "itv", "iud", "ivied", "ivory", "ivy", "jab", "jabber", "jack", "jackal", "jackanapes", "jackaroo", "jackass", "jackboot", "jackdaw", "jackeroo", "jacket", "jackpot", "jackrabbit", "jacobean", "jacobite", "jade", "jaded", "jaffa", "jag", "jagged", "jaguar", "jail", "jailbird", "jailbreak", "jailer", "jailor", "jalopy", "jam", "jamb", "jamboree", "jammy", "jangle", "janissary", "janitor", "january", "japan", "jape", "japonica", "jar", "jargon", "jasmine", "jasper", "jaundice", "jaundiced", "jaunt", "jaunty", "javelin", "jaw", "jawbone", "jawbreaker", "jaws", "jay", "jaywalk", "jazz", "jazzy", "jealous", "jealousy", "jeans", "jeep", "jeer", "jehovah", "jejune", "jell", "jellied", "j
 ello", "jelly", "jellyfish", "jemmy", "jenny", "jeopardise", "jeopardize", "jeopardy", "jerboa", "jeremiad", "jerk", "jerkin", "jerky", "jeroboam", "jerry", "jersey", "jest", "jester", "jesting", "jesuit", "jesuitical", "jet", "jetsam", "jettison", "jetty", "jew", "jewel", "jeweled", "jeweler", "jewelled", "jeweller", "jewellery", "jewelry", "jewess", "jewish", "jezebel", "jib", "jibe", "jiffy", "jig", "jigger", "jiggered", "jiggle", "jigsaw", "jihad", "jilt", "jiminy", "jimjams", "jimmy", "jingle", "jingo", "jingoism", "jinks", "jinn", "jinrikisha", "jinx", "jitney", "jitterbug", "jitters", "jiujitsu", "jive", "jnr", "job", "jobber", "jobbery", "jobbing", "jobless", "jockey", "jockstrap", "jocose", "jocular", "jocund", "jodhpurs", "jog", "joggle", "john", "johnny", "join", "joiner", "joinery", "joint", "joist", "joke", "joker", "jollification", "jollity", "jolly", "jolt", "jolty", "jonah", "jonquil", "josh", "jostle", "jot", "jotter", "jotting", "joule", "journal", "journalese", "j
 ournalism", "journalist", "journey", "journeyman", "joust", "jove", "jovial", "jowl", "joy", "joyful", "joyless", "joyous", "joyride", "joystick", "jubilant", "jubilation", "jubilee", "judaic", "judaism", "judder", "judge", "judgement", "judgment", "judicature", "judicial", "judiciary", "judicious", "judo", "jug", "juggernaut", "juggle", "juice", "juicy", "jujitsu", "juju", "jujube", "jukebox", "julep", "july", "jumble", "jumbo", "jump", "jumper", "jumps", "jumpy", "junction", "juncture", "june", "jungle", "junior", "juniper", "junk", "junket", "junketing", "junkie", "junky", "junoesque", "junta", "jupiter", "juridical", "jurisdiction", "jurisprudence", "jurist", "juror", "jury", "juryman", "just", "justice", "justifiable", "justification", "justified", "justify", "jut", "jute", "juvenile", "juxtapose", "juxtaposition", "kaffir", "kafir", "kaftan", "kail", "kaiser", "kale", "kaleidoscope", "kaleidoscopic", "kalends", "kampong", "kangaroo", "kaolin", "kapok", "kappa", "kaput", "karat
 ", "karate", "karma", "katydid", "kayak", "kazoo", "kebab", "kebob", "kedgeree", "keel", "keelhaul", "keen", "keep", "keeper", "keeping", "keeps", "keepsake", "keg", "kelp", "kelvin", "ken", "kennel", "kennels", "kepi", "kept", "kerb", "kerchief", "kerfuffle", "kernel", "kerosene", "kerosine", "kersey", "kestrel", "ketch", "ketchup", "kettle", "kettledrum", "key", "keyboard", "keyhole", "keyless", "keynote", "keypunch", "keystone", "khaki", "khalif", "khalifate", "khan", "kibbutz", "kibosh", "kick", "kickback", "kicker", "kickoff", "kicks", "kid", "kiddie", "kiddy", "kidnap", "kidney", "kike", "kill", "killer", "killing", "killjoy", "kiln", "kilo", "kilogram", "kilogramme", "kilohertz", "kiloliter", "kilolitre", "kilometer", "kilometre", "kilowatt", "kilt", "kimono", "kin", "kind", "kindergarten", "kindle", "kindling", "kindly", "kindness", "kindred", "kine", "kinetic", "kinetics", "kinfolk", "king", "kingcup", "kingdom", "kingfisher", "kingly", "kingmaker", "kingpin", "kings", "kin
 gship", "kink", "kinky", "kinsfolk", "kinship", "kinsman", "kiosk", "kip", "kipper", "kirk", "kirsch", "kirtle", "kismet", "kiss", "kisser", "kit", "kitchen", "kitchenette", "kite", "kitsch", "kitten", "kittenish", "kittiwake", "kitty", "kiwi", "klaxon", "kleenex", "kleptomania", "kleptomaniac", "knack", "knacker", "knackered", "knapsack", "knave", "knavery", "knead", "knee", "kneecap", "kneel", "knell", "knew", "knickerbockers", "knickers", "knife", "knight", "knighthood", "knightly", "knit", "knitter", "knitting", "knitwear", "knives", "knob", "knobbly", "knobkerrie", "knock", "knockabout", "knockdown", "knocker", "knockers", "knockout", "knoll", "knot", "knothole", "knotty", "knout", "know", "knowing", "knowingly", "knowledge", "knowledgeable", "known", "knuckle", "koala", "kohl", "kohlrabi", "kookaburra", "kopeck", "kopek", "kopje", "koppie", "koran", "kosher", "kowtow", "kraal", "kremlin", "kris", "krona", "krone", "kudos", "kukri", "kumis", "kumquat", "kuomintang", "kurus", "k
 vass", "kwashiorkor", "kwela", "laager", "lab", "label", "labial", "labor", "laboratory", "laborer", "laborious", "labour", "labourer", "labourite", "labrador", "laburnum", "labyrinth", "lace", "lacerate", "laceration", "lachrymal", "lachrymose", "lack", "lackadaisical", "lackey", "lacking", "lackluster", "lacklustre", "laconic", "lacquer", "lacrosse", "lactation", "lactic", "lactose", "lacuna", "lacy", "lad", "ladder", "laddie", "laddy", "laden", "ladies", "lading", "ladle", "lady", "ladybird", "ladylike", "ladyship", "lag", "lager", "laggard", "lagging", "lagoon", "laid", "lain", "lair", "laird", "laity", "lake", "lam", "lama", "lamaism", "lamasery", "lamb", "lambaste", "lambent", "lambkin", "lamblike", "lambskin", "lame", "lament", "lamentable", "lamentation", "laminate", "lamming", "lamp", "lampoon", "lamppost", "lamprey", "lampshade", "lance", "lancer", "lancers", "lancet", "land", "landau", "landed", "landfall", "landing", "landlady", "landlocked", "landlord", "landlubber", "l
 andmark", "landmine", "lands", "landscape", "landslide", "landslip", "landward", "landwards", "lane", "language", "languid", "languish", "languor", "lank", "lanky", "lanolin", "lantern", "lanternslide", "lanyard", "lap", "lapdog", "lapel", "lapidary", "lapse", "lapsed", "lapwing", "larboard", "larceny", "larch", "lard", "larder", "large", "largely", "largess", "largesse", "largo", "lariat", "lark", "larkspur", "larrup", "larva", "laryngeal", "laryngitis", "laryngoscope", "larynx", "lasagna", "lascivious", "laser", "lash", "lashing", "lashings", "lass", "lasso", "last", "lasting", "lastly", "lat", "latch", "latchkey", "late", "latecomer", "lately", "latent", "lateral", "latest", "latex", "lath", "lathe", "lather", "latin", "latinise", "latinize", "latitude", "latitudes", "latitudinal", "latitudinarian", "latrine", "latter", "latterly", "lattice", "laud", "laudable", "laudanum", "laudatory", "laugh", "laughable", "laughingstock", "laughter", "launch", "launder", "launderette", "laundr
 ess", "laundry", "laureate", "laurel", "laurels", "lava", "lavatory", "lave", "lavender", "lavish", "law", "lawful", "lawless", "lawn", "lawsuit", "lawyer", "lax", "laxative", "laxity", "lay", "layabout", "layer", "layette", "layman", "layout", "laze", "lazy", "lbw", "lcm", "lea", "leach", "lead", "leaden", "leader", "leadership", "leading", "leads", "leaf", "leafage", "leafed", "leaflet", "leafy", "league", "leak", "leakage", "leaky", "lean", "leaning", "leap", "leapfrog", "learn", "learned", "learner", "learning", "lease", "leasehold", "leash", "least", "leastways", "leather", "leatherette", "leathery", "leave", "leaved", "leaven", "leavening", "leaves", "leavings", "lech", "lecher", "lecherous", "lechery", "lectern", "lecture", "lecturer", "lectureship", "led", "ledge", "ledger", "lee", "leech", "leek", "leer", "leery", "lees", "leeward", "leeway", "left", "leftist", "leftovers", "leftward", "leftwards", "leg", "legacy", "legal", "legalise", "legality", "legalize", "legate", "leg
 atee", "legation", "legato", "legend", "legendary", "leger", "legerdemain", "legged", "leggings", "leggy", "legible", "legion", "legionary", "legislate", "legislation", "legislative", "legislator", "legislature", "legit", "legitimate", "legitimatise", "legitimatize", "legroom", "legume", "leguminous", "lei", "leisure", "leisured", "leisurely", "leitmotif", "leitmotive", "lemming", "lemon", "lemonade", "lemur", "lend", "length", "lengthen", "lengthways", "lengthy", "lenience", "lenient", "lenity", "lens", "lent", "lentil", "lento", "leo", "leonine", "leopard", "leotard", "leper", "leprechaun", "leprosy", "lesbian", "lesion", "less", "lessee", "lessen", "lesser", "lesson", "lessor", "lest", "let", "letdown", "lethal", "lethargy", "letraset", "letter", "letterbox", "lettered", "letterhead", "lettering", "letterpress", "letters", "letting", "lettuce", "letup", "leucocyte", "leucotomy", "leukaemia", "leukemia", "leukocyte", "levee", "level", "leveler", "leveller", "lever", "leverage", "l
 everet", "leviathan", "levitate", "levity", "levodopa", "levy", "lewd", "lexical", "lexicographer", "lexicography", "lexicon", "lexis", "liability", "liable", "liaise", "liaison", "liana", "liar", "lib", "libation", "libel", "libellous", "libelous", "liberal", "liberalise", "liberalism", "liberality", "liberalize", "liberally", "liberate", "liberated", "liberation", "libertarian", "liberties", "libertine", "liberty", "libidinous", "libido", "libra", "librarian", "library", "librettist", "libretto", "lice", "licence", "licenced", "license", "licensed", "licensee", "licentiate", "licentious", "lichen", "licit", "lick", "licking", "licorice", "lid", "lido", "lie", "lieder", "lief", "liege", "lien", "lieu", "lieutenant", "life", "lifeblood", "lifeboat", "lifeguard", "lifeless", "lifelike", "lifeline", "lifelong", "lifer", "lifetime", "lift", "liftboy", "ligament", "ligature", "light", "lighten", "lighter", "lighterage", "lighthouse", "lighting", "lightly", "lightness", "lightning", "lig
 hts", "lightship", "lightweight", "ligneous", "lignite", "likable", "like", "likeable", "likelihood", "likely", "liken", "likeness", "likes", "likewise", "liking", "lilac", "lilliputian", "lilo", "lilt", "lily", "limb", "limber", "limbo", "lime", "limeade", "limejuice", "limekiln", "limelight", "limerick", "limestone", "limey", "limit", "limitation", "limited", "limiting", "limitless", "limn", "limousine", "limp", "limpet", "limpid", "limy", "linchpin", "linctus", "linden", "line", "lineage", "lineal", "lineament", "linear", "lineman", "linen", "lineout", "liner", "linertrain", "lines", "lineshooter", "linesman", "lineup", "ling", "linger", "lingerie", "lingering", "lingo", "lingual", "linguist", "linguistic", "linguistics", "liniment", "lining", "link", "linkage", "linkman", "links", "linkup", "linnet", "linocut", "linoleum", "linotype", "linseed", "lint", "lintel", "lion", "lionize", "lip", "lipid", "lipstick", "liquefaction", "liquefy", "liquescent", "liqueur", "liquid", "liquida
 te", "liquidation", "liquidator", "liquidity", "liquidize", "liquidizer", "liquor", "liquorice", "lira", "lisle", "lisp", "lissom", "lissome", "list", "listen", "listenable", "listener", "listless", "lists", "lit", "litany", "litchi", "liter", "literacy", "literal", "literally", "literary", "literate", "literati", "literature", "lithe", "lithium", "lithograph", "lithographic", "lithography", "litigant", "litigate", "litigation", "litigious", "litmus", "litotes", "litre", "litter", "litterateur", "litterbin", "litterlout", "little", "littoral", "liturgical", "liturgy", "livable", "live", "liveable", "livelihood", "livelong", "lively", "liven", "liver", "liveried", "liverish", "livery", "liveryman", "lives", "livestock", "livid", "living", "lizard", "llama", "load", "loaded", "loadstar", "loadstone", "loaf", "loafsugar", "loam", "loan", "loanword", "loath", "loathe", "loathing", "loathsome", "loaves", "lob", "lobby", "lobed", "lobotomy", "lobster", "lobsterpot", "local", "locale", "lo
 calise", "localism", "locality", "localize", "locally", "locate", "located", "location", "loch", "loci" };
+        internal static string[] data = new string[] {
+            "granular","granulate","granule","grape","grapefruit",
+            "grapeshot","grapevine","graph","graphic","graphical",
+            "graphically","graphite","graphology","grapnel","grapple",
+            "grasp","grasping","grass","grasshopper","grassland",
+            "grassy","grate","grateful","grater","gratification",
+            "gratify","gratifying","grating","gratis","gratitude",
+            "gratuitous","gratuity","grave","gravel","gravelly",
+            "gravestone","graveyard","gravitate","gravitation","gravity",
+            "gravure","gravy","gray","graybeard","grayish",
+            "graze","grease","greasepaint","greaseproof","greaser",
+            "greasy","great","greatcoat","greater","greatly",
+            "grebe","grecian","greed","greedy","green",
+            "greenback","greenery","greenfly","greengage","greengrocer",
+            "greenhorn","greenhouse","greenish","greenroom","greens",
+            "greenwood","greet","greeting","gregarious","gremlin",
+            "grenade","grenadier","grenadine","grew","grey",
+            "greybeard","greyhound","greyish","grid","griddle",
+            "gridiron","grief","grievance","grieve","grievous",
+            "griffin","grill","grim","grimace","grime",
+            "grimy","grin","grind","grinder","grindstone",
+            "gringo","grip","gripe","gripes","gripping",
+            "grisly","grist","gristle","grit","grits",
+            "grizzle","grizzled","groan","groat","groats",
+            "grocer","groceries","grocery","grog","groggy",
+            "groin","groom","groove","groover","groovy",
+            "grope","gropingly","gross","grotesque","grotto",
+            "grotty","grouch","ground","grounding","groundless",
+            "groundnut","grounds","groundsel","groundsheet","groundsman",
+            "groundwork","group","groupie","grouping","grouse",
+            "grove","grovel","grow","grower","growl",
+            "grown","growth","groyne","grub","grubby",
+            "grudge","grudging","gruel","grueling","gruelling",
+            "gruesome","gruff","grumble","grumbling","grumpy",
+            "grundyism","grunt","gryphon","guano","guarantee",
+            "guarantor","guaranty","guard","guarded","guardhouse",
+            "guardian","guardianship","guardrail","guardroom","guardsman",
+            "guava","gubernatorial","gudgeon","guerilla","guerrilla",
+            "guess","guesswork","guest","guesthouse","guestroom",
+            "guffaw","guidance","guide","guidelines","guild",
+            "guilder","guildhall","guile","guileless","guillemot",
+            "guillotine","guilt","guilty","guinea","guipure",
+            "guise","guitar","gulch","gulden","gulf",
+            "gull","gullet","gulley","gullible","gully",
+            "gulp","gum","gumbo","gumboil","gumboot",
+            "gumdrop","gummy","gumption","gun","gunboat",
+            "gundog","gunfire","gunge","gunman","gunmetal",
+            "gunnel","gunner","gunnery","gunnysack","gunpoint",
+            "gunpowder","gunrunner","gunshot","gunshy","gunsmith",
+            "gunwale","guppy","gurgle","guru","gush",
+            "gusher","gushing","gushy","gusset","gust",
+            "gustatory","gusto","gusty","gut","gutless",
+            "guts","gutsy","gutter","guttersnipe","guttural",
+            "guv","guvnor","guy","guzzle","gym",
+            "gymkhana","gymnasium","gymnast","gymnastic","gymnastics",
+            "gymslip","gynaecology","gynecology","gyp","gypsum",
+            "gypsy","gyrate","gyration","gyroscope","gyves",
+            "haberdasher","haberdashery","habiliment","habit","habitable",
+            "habitat","habitation","habitual","habituate","hacienda",
+            "hack","hackles","hackney","hackneyed","hacksaw",
+            "hackwork","had","haddock","hadji","haft",
+            "hag","haggard","haggis","haggle","hagiography",
+            "haiku","hail","hailstone","hailstorm","hair",
+            "hairbrush","haircut","hairdo","hairdresser","hairgrip",
+            "hairless","hairline","hairnet","hairpiece","hairpin",
+            "hairspring","hairy","hajji","hake","halberd",
+            "halcyon","hale","half","halfback","halfpence",
+            "halfpenny","halfpennyworth","halftone","halfway","halibut",
+            "halitosis","hall","halleluja","halliard","hallmark",
+            "hallo","hallow","hallstand","hallucinate","hallucination",
+            "hallucinatory","hallucinogenic","hallway","halma","halo",
+            "halt","halter","halterneck","halting","halve",
+            "halves","halyard","ham","hamadryad","hamburger",
+            "hamlet","hammer","hammock","hamper","hamster",
+            "hamstring","hand","handbag","handball","handbarrow",
+            "handbill","handbook","handbrake","handcart","handclap",
+            "handcuff","handcuffs","handful","handgun","handhold",
+            "handicap","handicraft","handiwork","handkerchief","handle",
+            "handlebars","handler","handloom","handmade","handmaiden",
+            "handout","handpick","handrail","handshake","handsome",
+            "handstand","handwork","handwriting","handwritten","handy",
+            "handyman","hang","hangar","hangdog","hanger",
+            "hanging","hangings","hangman","hangnail","hangout",
+            "hangover","hangup","hank","hanker","hankering",
+            "hankie","hanky","hansard","hansom","hap",
+            "haphazard","hapless","haply","happen","happening",
+            "happily","happiness","happy","harangue","harass",
+            "harassment","harbinger","harbor","harbour","hard",
+            "hardback","hardboard","hardbound","harden","hardheaded",
+            "hardihood","hardiness","hardly","hardness","hardship",
+            "hardtop","hardware","hardwearing","hardwood","hardy",
+            "hare","harebell","harebrained","harelip","harem",
+            "haricot","hark","harlequin","harlequinade","harlot",
+            "harm","harmless","harmonic","harmonica","harmonise",
+            "harmonium","harmonize","harmony","harness","harp",
+            "harpoon","harpsichord","harpy","harquebus","harridan",
+            "harrier","harrow","harrowing","harry","harsh",
+            "hart","hartal","hartebeest","harvest","harvester",
+            "has","hash","hashish","hasp","hassle",
+            "hassock","hast","haste","hasten","hasty",
+            "hat","hatband","hatch","hatchback","hatchery",
+            "hatchet","hatching","hatchway","hate","hateful",
+            "hath","hatless","hatpin","hatred","hatter",
+            "hauberk","haughty","haul","haulage","haulier",
+            "haulm","haunch","haunt","haunting","hautbois",
+            "hautboy","hauteur","havana","have","haven",
+            "haver","haversack","haves","havoc","haw",
+            "hawk","hawker","hawser","hawthorn","hay",
+            "haycock","hayfork","haymaker","haystack","haywire",
+            "hazard","hazardous","haze","hazel","hazy",
+            "head","headache","headband","headboard","headcheese",
+            "headdress","header","headfirst","headgear","headhunter",
+            "heading","headland","headless","headlight","headline",
+            "headlong","headman","headmaster","headphone","headpiece",
+            "headquarters","headrest","headroom","headset","headship",
+            "headshrinker","headstall","headstone","headstrong","headway",
+            "headwind","headword","heady","heal","health",
+            "healthful","healthy","heap","hear","hearer",
+            "hearing","hearken","hearsay","hearse","heart",
+            "heartache","heartbeat","heartbreak","heartbreaking","heartbroken",
+            "heartburn","hearten","heartening","heartfelt","hearth",
+            "hearthrug","heartily","heartless","heartrending","heartsease",
+            "heartsick","heartstrings","heartthrob","heartwarming","heartwood",
+            "hearty","heat","heated","heater","heath",
+            "heathen","heather","heating","heatstroke","heave",
+            "heaven","heavenly","heavenwards","heavy","heavyhearted",
+            "heavyweight","hebdomadal","hebraic","hebrew","hecatomb",
+            "heck","heckle","hectare","hectic","hector",
+            "hedge","hedgehog","hedgehop","hedgerow","hedonism",
+            "heed","heel","heelball","hefty","hegemony",
+            "hegira","heifer","height","heighten","heinous",
+            "heir","heiress","heirloom","hejira","held",
+            "helicopter","heliograph","heliotrope","heliport","helium",
+            "hell","hellcat","hellene","hellenic","hellenistic",
+            "hellish","hellishly","hello","helm","helmet",
+            "helmeted","helmsman","helot","help","helpful",
+            "helping","helpless","helpmate","helve","hem",
+            "hemisphere","hemline","hemlock","hemoglobin","hemophilia",
+            "hemophiliac","hemorrhage","hemorrhoid","hemp","hempen",
+            "hemstitch","hen","henbane","hence","henceforth",
+            "henchman","henna","hennaed","henpecked","hepatitis",
+            "heptagon","her","herald","heraldic","heraldry",
+            "herb","herbaceous","herbage","herbal","herbalist",
+            "herbivorous","herculean","herd","herdsman","here",
+            "hereabouts","hereafter","hereby","hereditament","hereditary",
+            "heredity","herein","hereinafter","hereof","heresy",
+            "heretic","hereto","heretofore","hereunder","hereupon",
+            "herewith","heritable","heritage","hermaphrodite","hermetic",
+            "hermit","hermitage","hernia","hero","heroic",
+            "heroics","heroin","heroism","heron","heronry",
+            "herpes","herr","herring","herringbone","hers",
+            "herself","hertz","hesitancy","hesitant","hesitate",
+            "hesitation","hesperus","hessian","heterodox","heterodoxy",
+            "heterogeneous","heterosexual","heuristic","heuristics","hew",
+            "hewer","hex","hexagon","hexagram","hexameter",
+            "hey","heyday","hiatus","hibernate","hibiscus",
+            "hiccough","hiccup","hick","hickory","hide",
+            "hideaway","hidebound","hideous","hiding","hie",
+            "hierarchy","hieroglyph","hieroglyphics","high","highball",
+            "highborn","highboy","highbrow","higher","highfalutin",
+            "highland","highlander","highlands","highlight","highly",
+            "highness","highpitched","highroad","highway","highwayman",
+            "hijack","hike","hilarious","hilarity","hill",
+            "hillbilly","hillock","hillside","hilly","hilt",
+            "him","himself","hind","hinder","hindmost",
+            "hindquarters","hindrance","hindsight","hindu","hinduism",
+            "hinge","hint","hinterland","hip","hipbath",
+            "hippie","hippodrome","hippopotamus","hippy","hipster",
+            "hire","hireling","hirsute","his","hiss",
+            "hist","histamine","histology","historian","historic",
+            "historical","history","histrionic","histrionics","hit",
+            "hitch","hitchhike","hither","hitherto","hive",
+            "hives","hms","hoard","hoarding","hoarfrost",
+            "hoarse","hoary","hoax","hob","hobble",
+            "hobbledehoy","hobby","hobbyhorse","hobgoblin","hobnail",
+            "hobnob","hobo","hock","hockey","hod",
+            "hodgepodge","hoe","hog","hoggish","hogmanay",
+            "hogshead","hogwash","hoist","hold","holdall",
+            "holder","holding","holdover","holdup","hole",
+            "holiday","holidaymaker","holiness","holler","hollow",
+            "holly","hollyhock","hollywood","holocaust","holograph",
+            "holstein","holster","holy","homage","homburg",
+            "home","homecoming","homegrown","homeland","homelike",
+            "homely","homemade","homeopath","homeopathy","homeric",
+            "homesick","homespun","homestead","hometown","homeward",
+            "homewards","homework","homey","homicidal","homicide",
+            "homiletic","homiletics","homily","homing","hominy",
+            "homoeopath","homoeopathy","homogeneous","homogenise","homogenize",
+            "homograph","homonym","homophone","homosexual","homy",
+            "hone","honest","honestly","honesty","honey",
+            "honeybee","honeycomb","honeycombed","honeydew","honeyed",
+            "honeymoon","honeysuckle","honk","honkie","honky",
+            "honor","honorable","honorarium","honorary","honorific",
+            "honors","honour","honourable","honours","hooch",
+            "hood","hooded","hoodlum","hoodoo","hoodwink",
+            "hooey","hoof","hook","hookah","hooked",
+            "hooker","hookey","hookup","hookworm","hooky",
+            "hooligan","hoop","hooray","hoot","hooter",
+            "hoover","hooves","hop","hope","hopeful",
+            "hopefully","hopeless","hopper","hopscotch","horde",
+            "horizon","horizontal","hormone","horn","hornbeam",
+            "hornbill","horned","hornet","hornpipe","horny",
+            "horology","horoscope","horrendous","horrible","horrid",
+            "horrific","horrify","horror","horrors","horse",
+            "horseback","horsebox","horseflesh","horsefly","horsehair",
+            "horselaugh","horseman","horsemanship","horsemeat","horseplay",
+            "horsepower","horseracing","horseradish","horseshit","horseshoe",
+            "horsewhip","horsewoman","horsy","hortative","horticulture",
+            "hosanna","hose","hosier","hosiery","hospice",
+            "hospitable","hospital","hospitalise","hospitality","hospitalize",
+            "host","hostage","hostel","hosteler","hosteller",
+            "hostelry","hostess","hostile","hostilities","hostility",
+            "hostler","hot","hotbed","hotchpotch","hotel",
+            "hotelier","hotfoot","hothead","hothouse","hotly",
+            "hotplate","hotpot","hottentot","hound","hour",
+            "hourglass","houri","hourly","house","houseboat",
+            "housebound","houseboy","housebreaker","housebroken","housecoat",
+            "housecraft","housedog","housefather","housefly","houseful",
+            "household","householder","housekeeper","housekeeping","housemaid",
+            "houseman","housemaster","housemother","houseroom","housetops",
+            "housewarming","housewife","housewifery","housework","housing",
+            "hove","hovel","hover","hovercraft","how",
+            "howdah","howdy","however","howitzer","howl",
+            "howler","howling","howsoever","hoyden","hrh",
+            "hub","hubbub","hubby","hubcap","hubris",
+            "huckaback","huckleberry","huckster","huddle","hue",
+            "huff","huffish","huffy","hug","huge",
+            "hugely","huguenot","huh","hula","hulk",
+            "hulking","hull","hullabaloo","hullo","hum",
+            "human","humane","humanise","humanism","humanitarian",
+            "humanitarianism","humanities","humanity","humanize","humankind",
+            "humanly","humble","humbug","humdinger","humdrum",
+            "humerus","humid","humidify","humidity","humidor",
+            "humiliate","humility","hummingbird","hummock","humor",
+            "humorist","humorous","humour","hump","humpback",
+            "humph","humus","hun","hunch","hunchback",
+            "hundred","hundredweight","hung","hunger","hungry",
+            "hunk","hunkers","hunt","hunter","hunting",
+            "huntress","huntsman","hurdle","hurl","hurling",
+            "hurray","hurricane","hurried","hurry","hurt",
+            "hurtful","hurtle","husband","husbandman","husbandry",
+            "hush","husk","husky","hussar","hussy",
+            "hustings","hustle","hustler","hut","hutch",
+            "hutment","huzza","huzzah","hyacinth","hyaena",
+            "hybrid","hybridise","hybridize","hydra","hydrangea",
+            "hydrant","hydrate","hydraulic","hydraulics","hydrocarbon",
+            "hydroelectric","hydrofoil","hydrogen","hydrophobia","hydroplane",
+            "hydroponics","hydrotherapy","hyena","hygiene","hygienic",
+            "hymen","hymeneal","hymn","hymnal","hyperbola",
+            "hyperbole","hyperbolic","hypercritical","hypermarket","hypersensitive",
+            "hyphen","hyphenate","hypnosis","hypnotise","hypnotism",
+            "hypnotist","hypnotize","hypo","hypochondria","hypochondriac",
+            "hypocrisy","hypocrite","hypodermic","hypotenuse","hypothermia",
+            "hypothesis","hypothetical","hysterectomy","hysteria","hysterical",
+            "hysterics","iamb","iberian","ibex","ibidem",
+            "ibis","icbm","ice","iceberg","icebound",
+            "icebox","icebreaker","icefall","icehouse","iceman",
+            "icicle","icing","icon","iconoclast","icy",
+            "idea","ideal","idealise","idealism","idealist",
+            "idealize","ideally","idem","identical","identification",
+            "identify","identikit","identity","ideogram","ideology",
+            "ides","idiocy","idiom","idiomatic","idiosyncrasy",
+            "idiot","idle","idol","idolater","idolatrous",
+            "idolatry","idolise","idolize","idyl","idyll",
+            "igloo","igneous","ignite","ignition","ignoble",
+            "ignominious","ignominy","ignoramus","ignorance","ignorant",
+            "ignore","iguana","ikon","ilex","ilk",
+            "ill","illegal","illegality","illegible","illegitimate",
+            "illiberal","illicit","illimitable","illiterate","illness",
+            "illogical","illuminate","illuminating","illumination","illuminations",
+            "illusion","illusionist","illusory","illustrate","illustration",
+            "illustrative","illustrator","illustrious","image","imagery",
+            "imaginable","imaginary","imagination","imaginative","imagine",
+            "imam","imbalance","imbecile","imbecility","imbed",
+            "imbibe","imbroglio","imbue","imitate","imitation",
+            "imitative","imitator","immaculate","immanence","immanent",
+            "immaterial","immature","immeasurable","immediacy","immediate",
+            "immediately","immemorial","immense","immensely","immensity",
+            "immerse","immersion","immigrant","immigrate","imminence",
+            "imminent","immobile","immobilise","immobilize","immoderate",
+            "immodest","immolate","immoral","immorality","immortal",
+            "immortalise","immortality","immortalize","immovable","immune",
+            "immunise","immunize","immure","immutable","imp",
+            "impact","impacted","impair","impala","impale",
+            "impalpable","impanel","impart","impartial","impassable",
+            "impasse","impassioned","impassive","impatience","impatient",
+            "impeach","impeccable","impecunious","impedance","impede",
+            "impediment","impedimenta","impel","impending","impenetrable",
+            "impenitent","imperative","imperceptible","imperfect","imperial",
+            "imperialism","imperialist","imperialistic","imperil","imperious",
+            "imperishable","impermanent","impermeable","impersonal","impersonate",
+            "impertinent","imperturbable","impervious","impetigo","impetuous",
+            "impetus","impiety","impinge","impious","impish",
+            "implacable","implant","implement","implicate","implication",
+            "implicit","implore","implosion","imply","impolite",
+            "impolitic","imponderable","import","importance","important",
+            "importation","importunate","importune","impose","imposing",
+            "imposition","impossible","impostor","imposture","impotent",
+            "impound","impoverish","impracticable","impractical","imprecation",
+            "impregnable","impregnate","impresario","impress","impression",
+            "impressionable","impressionism","impressionist","impressionistic","impressive",
+            "imprimatur","imprint","imprison","improbability","improbable",
+            "impromptu","improper","impropriety","improve","improvement",
+            "improvident","improvise","imprudent","impudent","impugn",
+            "impulse","impulsion","impulsive","impunity","impure",
+            "impurity","imputation","impute","inability","inaccessible",
+            "inaccurate","inaction","inactive","inadequacy","inadequate",
+            "inadmissible","inadvertent","inalienable","inamorata","inane",
+            "inanimate","inanition","inanity","inapplicable","inappropriate",
+            "inapt","inaptitude","inarticulate","inartistic","inattention",
+            "inattentive","inaudible","inaugural","inaugurate","inauspicious",
+            "inboard","inborn","inbound","inbred","inbreeding",
+            "inc","incalculable","incandescent","incantation","incapable",
+            "incapacitate","incapacity","incarcerate","incarnate","incarnation",
+            "incautious","incendiarism","incendiary","incense","incentive",
+            "inception","incertitude","incessant","incest","incestuous",
+            "inch","inchoate","incidence","incident","incidental",
+            "incidentally","incidentals","incinerate","incinerator","incipience",
+            "incipient","incise","incision","incisive","incisor",
+            "incite","incivility","inclement","inclination","incline",
+            "inclined","inclose","inclosure","include","included",
+            "including","inclusion","inclusive","incognito","incoherent",
+            "incombustible","income","incoming","incommensurable","incommensurate",
+            "incommode","incommodious","incommunicable","incommunicado","incommunicative",
+            "incomparable","incompatible","incompetence","incompetent","incomplete",
+            "incomprehensible","incomprehensibly","incomprehension","inconceivable","inconclusive",
+            "incongruity","incongruous","inconsequent","inconsequential","inconsiderable",
+            "inconsiderate","inconsistent","inconsolable","inconspicuous","inconstant",
+            "incontestable","incontinent","incontrovertible","inconvenience","inconvenient",
+            "incorporate","incorporated","incorporeal","incorrect","incorrigible",
+            "incorruptible","increase","increasingly","incredible","incredulity",
+            "incredulous","increment","incriminate","incrust","incrustation",
+            "incubate","incubation","incubator","incubus","inculcate",
+            "inculpate","incumbency","incumbent","incur","incurable",
+            "incurious","incursion","incurved","indebted","indecent",
+            "indecipherable","indecision","indecisive","indecorous","indecorum",
+            "indeed","indefatigable","indefensible","indefinable","indefinite",
+            "indefinitely","indelible","indelicate","indemnification","indemnify",
+            "indemnity","indent","indentation","indenture","independence",
+            "independent","indescribable","indestructible","indeterminable","indeterminate",
+            "index","indian","indicate","indication","indicative",
+            "indicator","indices","indict","indictable","indifferent",
+            "indigenous","indigent","indigestible","indigestion","indignant",
+            "indignation","indignity","indigo","indirect","indiscernible",
+            "indiscipline","indiscreet","indiscretion","indiscriminate","indispensable",
+            "indisposed","indisposition","indisputable","indissoluble","indistinct",
+            "indistinguishable","individual","individualise","individualism","individuality",
+            "individualize","individually","indivisible","indocile","indoctrinate",
+            "indolent","indomitable","indoor","indoors","indorse",
+            "indrawn","indubitable","induce","inducement","induct",
+            "induction","inductive","indue","indulge","indulgence",
+            "indulgent","industrial","industrialise","industrialism","industrialist",
+            "industrialize","industrious","industry","inebriate","inedible",
+            "ineducable","ineffable","ineffaceable","ineffective","ineffectual",
+            "inefficient","inelastic","inelegant","ineligible","ineluctable",
+            "inept","ineptitude","inequality","inequitable","inequity",
+            "ineradicable","inert","inertia","inescapable","inessential",
+            "inestimable","inevitable","inexact","inexactitude","inexcusable",
+            "inexhaustible","inexorable","inexpediency","inexpedient","inexpensive",
+            "inexperience","inexperienced","inexpert","inexpiable","inexplicable",
+            "inexplicably","inexpressible","inextinguishable","inextricable","infallible",
+            "infallibly","infamous","infamy","infancy","infant",
+            "infanticide","infantile","infantry","infantryman","infatuated",
+            "infatuation","infect","infection","infectious","infelicitous",
+            "infer","inference","inferential","inferior","infernal",
+            "inferno","infertile","infest","infidel","infidelity",
+            "infield","infighting","infiltrate","infiltration","infinite",
+            "infinitesimal","infinitive","infinitude","infinity","infirm",
+            "infirmary","infirmity","inflame","inflamed","inflammable",
+            "inflammation","inflammatory","inflatable","inflate","inflated",
+            "inflation","inflationary","inflect","inflection","inflexible",
+            "inflexion","inflict","infliction","inflow","influence",
+            "influential","influenza","influx","info","inform",
+            "informal","informant","information","informative","informed",
+            "informer","infra","infraction","infrared","infrastructure",
+            "infrequent","infringe","infuriate","infuse","infusion",
+            "ingathering","ingenious","ingenuity","ingenuous","ingest",
+            "inglenook","inglorious","ingoing","ingot","ingraft",
+            "ingrained","ingratiate","ingratiating","ingratitude","ingredient",
+            "ingress","ingrown","inhabit","inhabitant","inhale",
+            "inhaler","inharmonious","inhere","inherent","inherently",
+            "inherit","inheritance","inhibit","inhibited","inhibition",
+            "inhospitable","inhuman","inhumane","inhumanity","inimical",
+            "inimitable","iniquitous","iniquity","initial","initially",
+            "initiate","initiation","initiative","inject","injection",
+            "injudicious","injunction","injure","injurious","injury",
+            "injustice","ink","inkbottle","inkling","inkpad",
+            "inkstand","inkwell","inky","inlaid","inland",
+            "inlay","inlet","inmate","inmost","inn",
+            "innards","innate","inner","inning","innings",
+            "innkeeper","innocent","innocuous","innovate","innovation",
+            "innuendo","innumerable","inoculate","inoffensive","inoperable",
+            "inoperative","inopportune","inordinate","inorganic","input",
+            "inquest","inquietude","inquire","inquiring","inquiry",
+            "inquisition","inquisitive","inquisitor","inquisitorial","inroad",
+            "inrush","insalubrious","insane","insanitary","insanity",
+            "insatiable","insatiate","inscribe","inscription","inscrutable",
+            "insect","insecticide","insectivore","insectivorous","insecure",
+            "inseminate","insemination","insensate","insensibility","insensible",
+            "insensitive","inseparable","insert","insertion","inset",
+            "inshore","inside","insider","insidious","insight",
+            "insignia","insignificant","insincere","insinuate","insinuation",
+            "insipid","insist","insistence","insistency","insistent",
+            "insole","insolent","insoluble","insolvable","insolvent",
+            "insomnia","insomniac","insouciance","inspect","inspection",
+            "inspector","inspectorate","inspectorship","inspiration","inspire",
+            "inspired","instability","install","installation","installment",
+            "instalment","instance","instant","instantaneous","instantly",
+            "instead","instep","instigate","instigation","instil",
+            "instill","instinct","instinctive","institute","institution",
+            "instruct","instruction","instructive","instructor","instructress",
+            "instrument","instrumental","instrumentalist","instrumentality","instrumentation",
+            "insubordinate","insubstantial","insufferable","insufficiency","insufficient",
+            "insular","insularity","insulate","insulation","insulator",
+            "insulin","insult","insuperable","insupportable","insurance",
+            "insure","insured","insurer","insurgent","insurmountable",
+            "insurrection","intact","intaglio","intake","intangible",
+            "integer","integral","integrate","integrated","integrity",
+            "integument","intellect","intellectual","intelligence","intelligent",
+            "intelligentsia","intelligible","intemperate","intend","intended",
+            "intense","intensifier","intensify","intensity","intensive",
+            "intent","intention","intentional","intentions","inter",
+            "interact","interaction","interbreed","intercalary","intercalate",
+            "intercede","intercept","interceptor","intercession","interchange",
+            "interchangeable","intercity","intercollegiate","intercom","intercommunicate",
+            "intercommunion","intercontinental","intercourse","interdenominational","interdependent",
+            "interdict","interest","interested","interesting","interests",
+            "interface","interfere","interference","interim","interior",
+            "interject","interjection","interlace","interlard","interleave",
+            "interline","interlinear","interlink","interlock","interlocutor",
+            "interloper","interlude","intermarriage","intermarry","intermediary",
+            "intermediate","interment","intermezzo","interminable","intermingle",
+            "intermission","intermittent","intern","internal","internalise",
+            "internalize","international","internationale","internationalise","internationalism",
+            "internationalize","interne","internecine","internee","internment",
+            "interpellate","interpenetrate","interpersonal","interplanetary","interplay",
+            "interpol","interpolate","interpolation","interpose","interposition",
+            "interpret","interpretation","interpretative","interpreter","interracial",
+            "interregnum","interrelate","interrelation","interrogate","interrogative",
+            "interrogatory","interrupt","intersect","intersection","intersperse",
+            "interstate","interstellar","interstice","intertribal","intertwine",
+            "interurban","interval","intervene","intervention","interview",
+            "interweave","intestate","intestinal","intestine","intimacy",
+            "intimate","intimidate","intimidation","into","intolerable",
+            "intolerant","intonation","intone","intoxicant","intoxicate",
+            "intractable","intramural","intransigent","intransitive","intravenous",
+            "intrench","intrepid","intricacy","intricate","intrigue",
+            "intrinsic","intro","introduce","introduction","introductory",
+            "introit","introspection","introspective","introvert","introverted",
+            "intrude","intruder","intrusion","intrusive","intrust",
+            "intuit","intuition","intuitive","intumescence","inundate",
+            "inundation","inure","invade","invalid","invalidate",
+            "invalidism","invaluable","invariable","invasion","invective",
+            "inveigh","inveigle","invent","invention","inventive",
+            "inventor","inventory","inverse","inversion","invert",
+            "invertebrate","invest","investigate","investiture","investment",
+            "inveterate","invidious","invigilate","invigorate","invincible",
+            "inviolable","inviolate","invisible","invitation","invite",
+            "inviting","invocation","invoice","invoke","involuntary",
+            "involve","involved","invulnerable","inward","inwardness",
+            "inwards","inwrought","iodin","iodine","iodise",
+            "iodize","ion","ionic","ionise","ionize",
+            "ionosphere","iota","iou","ipa","ira",
+            "irascible","irate","ire","iridescent","iridium",
+            "irishman","irk","irksome","iron","ironclad",
+            "ironic","ironically","ironing","ironmonger","ironmongery",
+            "ironmould","irons","ironstone","ironware","ironwork",
+            "ironworks","irony","irradiate","irrational","irreconcilable",
+            "irrecoverable","irredeemable","irreducible","irrefutable","irregular",
+            "irregularity","irrelevance","irrelevant","irreligious","irremediable",
+            "irremovable","irreparable","irreplaceable","irrepressible","irreproachable",
+            "irresistible","irresolute","irresponsible","irretrievable","irreverent",
+            "irreversible","irrevocable","irrigate","irritable","irritant",
+            "irritate","irritation","irruption","isinglass","islam",
+            "island","islander","isle","islet","ism",
+            "isobar","isolate","isolated","isolation","isolationism",
+            "isotherm","isotope","israelite","issue","isthmus",
+            "ita","italic","italicise","italicize","italics",
+            "itch","itchy","item","itemise","itemize",
+            "iterate","itinerant","itinerary","itn","its",
+            "itself","itv","iud","ivied","ivory",
+            "ivy","jab","jabber","jack","jackal",
+            "jackanapes","jackaroo","jackass","jackboot","jackdaw",
+            "jackeroo","jacket","jackpot","jackrabbit","jacobean",
+            "jacobite","jade","jaded","jaffa","jag",
+            "jagged","jaguar","jail","jailbird","jailbreak",
+            "jailer","jailor","jalopy","jam","jamb",
+            "jamboree","jammy","jangle","janissary","janitor",
+            "january","japan","jape","japonica","jar",
+            "jargon","jasmine","jasper","jaundice","jaundiced",
+            "jaunt","jaunty","javelin","jaw","jawbone",
+            "jawbreaker","jaws","jay","jaywalk","jazz",
+            "jazzy","jealous","jealousy","jeans","jeep",
+            "jeer","jehovah","jejune","jell","jellied",
+            "jello","jelly","jellyfish","jemmy","jenny",
+            "jeopardise","jeopardize","jeopardy","jerboa","jeremiad",
+            "jerk","jerkin","jerky","jeroboam","jerry",
+            "jersey","jest","jester","jesting","jesuit",
+            "jesuitical","jet","jetsam","jettison","jetty",
+            "jew","jewel","jeweled","jeweler","jewelled",
+            "jeweller","jewellery","jewelry","jewess","jewish",
+            "jezebel","jib","jibe","jiffy","jig",
+            "jigger","jiggered","jiggle","jigsaw","jihad",
+            "jilt","jiminy","jimjams","jimmy","jingle",
+            "jingo","jingoism","jinks","jinn","jinrikisha",
+            "jinx","jitney","jitterbug","jitters","jiujitsu",
+            "jive","jnr","job","jobber","jobbery",
+            "jobbing","jobless","jockey","jockstrap","jocose",
+            "jocular","jocund","jodhpurs","jog","joggle",
+            "john","johnny","join","joiner","joinery",
+            "joint","joist","joke","joker","jollification",
+            "jollity","jolly","jolt","jolty","jonah",
+            "jonquil","josh","jostle","jot","jotter",
+            "jotting","joule","journal","journalese","journalism",
+            "journalist","journey","journeyman","joust","jove",
+            "jovial","jowl","joy","joyful","joyless",
+            "joyous","joyride","joystick","jubilant","jubilation",
+            "jubilee","judaic","judaism","judder","judge",
+            "judgement","judgment","judicature","judicial","judiciary",
+            "judicious","judo","jug","juggernaut","juggle",
+            "juice","juicy","jujitsu","juju","jujube",
+            "jukebox","julep","july","jumble","jumbo",
+            "jump","jumper","jumps","jumpy","junction",
+            "juncture","june","jungle","junior","juniper",
+            "junk","junket","junketing","junkie","junky",
+            "junoesque","junta","jupiter","juridical","jurisdiction",
+            "jurisprudence","jurist","juror","jury","juryman",
+            "just","justice","justifiable","justification","justified",
+            "justify","jut","jute","juvenile","juxtapose",
+            "juxtaposition","kaffir","kafir","kaftan","kail",
+            "kaiser","kale","kaleidoscope","kaleidoscopic","kalends",
+            "kampong","kangaroo","kaolin","kapok","kappa",
+            "kaput","karat","karate","karma","katydid",
+            "kayak","kazoo","kebab","kebob","kedgeree",
+            "keel","keelhaul","keen","keep","keeper",
+            "keeping","keeps","keepsake","keg","kelp",
+            "kelvin","ken","kennel","kennels","kepi",
+            "kept","kerb","kerchief","kerfuffle","kernel",
+            "kerosene","kerosine","kersey","kestrel","ketch",
+            "ketchup","kettle","kettledrum","key","keyboard",
+            "keyhole","keyless","keynote","keypunch","keystone",
+            "khaki","khalif","khalifate","khan","kibbutz",
+            "kibosh","kick","kickback","kicker","kickoff",
+            "kicks","kid","kiddie","kiddy","kidnap",
+            "kidney","kike","kill","killer","killing",
+            "killjoy","kiln","kilo","kilogram","kilogramme",
+            "kilohertz","kiloliter","kilolitre","kilometer","kilometre",
+            "kilowatt","kilt","kimono","kin","kind",
+            "kindergarten","kindle","kindling","kindly","kindness",
+            "kindred","kine","kinetic","kinetics","kinfolk",
+            "king","kingcup","kingdom","kingfisher","kingly",
+            "kingmaker","kingpin","kings","kingship","kink",
+            "kinky","kinsfolk","kinship","kinsman","kiosk",
+            "kip","kipper","kirk","kirsch","kirtle",
+            "kismet","kiss","kisser","kit","kitchen",
+            "kitchenette","kite","kitsch","kitten","kittenish",
+            "kittiwake","kitty","kiwi","klaxon","kleenex",
+            "kleptomania","kleptomaniac","knack","knacker","knackered",
+            "knapsack","knave","knavery","knead","knee",
+            "kneecap","kneel","knell","knew","knickerbockers",
+            "knickers","knife","knight","knighthood","knightly",
+            "knit","knitter","knitting","knitwear","knives",
+            "knob","knobbly","knobkerrie","knock","knockabout",
+            "knockdown","knocker","knockers","knockout","knoll",
+            "knot","knothole","knotty","knout","know",
+            "knowing","knowingly","knowledge","knowledgeable","known",
+            "knuckle","koala","kohl","kohlrabi","kookaburra",
+            "kopeck","kopek","kopje","koppie","koran",
+            "kosher","kowtow","kraal","kremlin","kris",
+            "krona","krone","kudos","kukri","kumis",
+            "kumquat","kuomintang","kurus","kvass","kwashiorkor",
+            "kwela","laager","lab","label","labial",
+            "labor","laboratory","laborer","laborious","labour",
+            "labourer","labourite","labrador","laburnum","labyrinth",
+            "lace","lacerate","laceration","lachrymal","lachrymose",
+            "lack","lackadaisical","lackey","lacking","lackluster",
+            "lacklustre","laconic","lacquer","lacrosse","lactation",
+            "lactic","lactose","lacuna","lacy","lad",
+            "ladder","laddie","laddy","laden","ladies",
+            "lading","ladle","lady","ladybird","ladylike",
+            "ladyship","lag","lager","laggard","lagging",
+            "lagoon","laid","lain","lair","laird",
+            "laity","lake","lam","lama","lamaism",
+            "lamasery","lamb","lambaste","lambent","lambkin",
+            "lamblike","lambskin","lame","lament","lamentable",
+            "lamentation","laminate","lamming","lamp","lampoon",
+            "lamppost","lamprey","lampshade","lance","lancer",
+            "lancers","lancet","land","landau","landed",
+            "landfall","landing","landlady","landlocked","landlord",
+            "landlubber","landmark","landmine","lands","landscape",
+            "landslide","landslip","landward","landwards","lane",
+            "language","languid","languish","languor","lank",
+            "lanky","lanolin","lantern","lanternslide","lanyard",
+            "lap","lapdog","lapel","lapidary","lapse",
+            "lapsed","lapwing","larboard","larceny","larch",
+            "lard","larder","large","largely","largess",
+            "largesse","largo","lariat","lark","larkspur",
+            "larrup","larva","laryngeal","laryngitis","laryngoscope",
+            "larynx","lasagna","lascivious","laser","lash",
+            "lashing","lashings","lass","lasso","last",
+            "lasting","lastly","lat","latch","latchkey",
+            "late","latecomer","lately","latent","lateral",
+            "latest","latex","lath","lathe","lather",
+            "latin","latinise","latinize","latitude","latitudes",
+            "latitudinal","latitudinarian","latrine","latter","latterly",
+            "lattice","laud","laudable","laudanum","laudatory",
+            "laugh","laughable","laughingstock","laughter","launch",
+            "launder","launderette","laundress","laundry","laureate",
+            "laurel","laurels","lava","lavatory","lave",
+            "lavender","lavish","law","lawful","lawless",
+            "lawn","lawsuit","lawyer","lax","laxative",
+            "laxity","lay","layabout","layer","layette",
+            "layman","layout","laze","lazy","lbw",
+            "lcm","lea","leach","lead","leaden",
+            "leader","leadership","leading","leads","leaf",
+            "leafage","leafed","leaflet","leafy","league",
+            "leak","leakage","leaky","lean","leaning",
+            "leap","leapfrog","learn","learned","learner",
+            "learning","lease","leasehold","leash","least",
+            "leastways","leather","leatherette","leathery","leave",
+            "leaved","leaven","leavening","leaves","leavings",
+            "lech","lecher","lecherous","lechery","lectern",
+            "lecture","lecturer","lectureship","led","ledge",
+            "ledger","lee","leech","leek","leer",
+            "leery","lees","leeward","leeway","left",
+            "leftist","leftovers","leftward","leftwards","leg",
+            "legacy","legal","legalise","legality","legalize",
+            "legate","legatee","legation","legato","legend",
+            "legendary","leger","legerdemain","legged","leggings",
+            "leggy","legible","legion","legionary","legislate",
+            "legislation","legislative","legislator","legislature","legit",
+            "legitimate","legitimatise","legitimatize","legroom","legume",
+            "leguminous","lei","leisure","leisured","leisurely",
+            "leitmotif","leitmotive","lemming","lemon","lemonade",
+            "lemur","lend","length","lengthen","lengthways",
+            "lengthy","lenience","lenient","lenity","lens",
+            "lent","lentil","lento","leo","leonine",
+            "leopard","leotard","leper","leprechaun","leprosy",
+            "lesbian","lesion","less","lessee","lessen",
+            "lesser","lesson","lessor","lest","let",
+            "letdown","lethal","lethargy","letraset","letter",
+            "letterbox","lettered","letterhead","lettering","letterpress",
+            "letters","letting","lettuce","letup","leucocyte",
+            "leucotomy","leukaemia","leukemia","leukocyte","levee",
+            "level","leveler","leveller","lever","leverage",
+            "leveret","leviathan","levitate","levity","levodopa",
+            "levy","lewd","lexical","lexicographer","lexicography",
+            "lexicon","lexis","liability","liable","liaise",
+            "liaison","liana","liar","lib","libation",
+            "libel","libellous","libelous","liberal","liberalise",
+            "liberalism","liberality","liberalize","liberally","liberate",
+            "liberated","liberation","libertarian","liberties","libertine",
+            "liberty","libidinous","libido","libra","librarian",
+            "library","librettist","libretto","lice","licence",
+            "licenced","license","licensed","licensee","licentiate",
+            "licentious","lichen","licit","lick","licking",
+            "licorice","lid","lido","lie","lieder",
+            "lief","liege","lien","lieu","lieutenant",
+            "life","lifeblood","lifeboat","lifeguard","lifeless",
+            "lifelike","lifeline","lifelong","lifer","lifetime",
+            "lift","liftboy","ligament","ligature","light",
+            "lighten","lighter","lighterage","lighthouse","lighting",
+            "lightly","lightness","lightning","lights","lightship",
+            "lightweight","ligneous","lignite","likable","like",
+            "likeable","likelihood","likely","liken","likeness",
+            "likes","likewise","liking","lilac","lilliputian",
+            "lilo","lilt","lily","limb","limber",
+            "limbo","lime","limeade","limejuice","limekiln",
+            "limelight","limerick","limestone","limey","limit",
+            "limitation","limited","limiting","limitless","limn",
+            "limousine","limp","limpet","limpid","limy",
+            "linchpin","linctus","linden","line","lineage",
+            "lineal","lineament","linear","lineman","linen",
+            "lineout","liner","linertrain","lines","lineshooter",
+            "linesman","lineup","ling","linger","lingerie",
+            "lingering","lingo","lingual","linguist","linguistic",
+            "linguistics","liniment","lining","link","linkage",
+            "linkman","links","linkup","linnet","linocut",
+            "linoleum","linotype","linseed","lint","lintel",
+            "lion","lionize","lip","lipid","lipstick",
+            "liquefaction","liquefy","liquescent","liqueur","liquid",
+            "liquidate","liquidation","liquidator","liquidity","liquidize",
+            "liquidizer","liquor","liquorice","lira","lisle",
+            "lisp","lissom","lissome","list","listen",
+            "listenable","listener","listless","lists","lit",
+            "litany","litchi","liter","literacy","literal",
+            "literally","literary","literate","literati","literature",
+            "lithe","lithium","lithograph","lithographic","lithography",
+            "litigant","litigate","litigation","litigious","litmus",
+            "litotes","litre","litter","litterateur","litterbin",
+            "litterlout","little","littoral","liturgical","liturgy",
+            "livable","live","liveable","livelihood","livelong",
+            "lively","liven","liver","liveried","liverish",
+            "livery","liveryman","lives","livestock","livid",
+            "living","lizard","llama","load","loaded",
+            "loadstar","loadstone","loaf","loafsugar","loam",
+            "loan","loanword","loath","loathe","loathing",
+            "loathsome","loaves","lob","lobby","lobed",
+            "lobotomy","lobster","lobsterpot","local","locale",
+            "localise","localism","locality","localize","locally",
+            "locate","located","location","loch","loci",
+        };
     }
 }
\ No newline at end of file


[04/50] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
index 9ca2372..c3bc291 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
@@ -1,10 +1,14 @@
 \ufeffusing System;
 using System.Diagnostics;
+using NUnit.Framework;
+using Lucene.Net.Util;
+using System.IO;
+using System.IO.Compression;
+using System.Text;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -22,77 +26,75 @@ namespace org.apache.lucene.analysis.hunspell
 	 */
 
 
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-	using Ignore = org.junit.Ignore;
-
-	/// <summary>
-	/// Can be retrieved via:
-	/// wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
-	/// Note some of the files differ only in case. This may be a problem on your operating system!
-	/// </summary>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @Ignore("enable manually") public class TestAllDictionaries extends org.apache.lucene.util.LuceneTestCase
-	public class TestAllDictionaries : LuceneTestCase
-	{
-
-	  // set this to the location of where you downloaded all the files
-	  internal static readonly File DICTIONARY_HOME = new File("/data/archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries");
-
-	  internal readonly string[] tests = new string[] {"af_ZA.zip", "af_ZA.dic", "af_ZA.aff", "ak_GH.zip", "ak_GH.dic", "ak_GH.aff", "bg_BG.zip", "bg_BG.dic", "bg_BG.aff", "ca_ANY.zip", "catalan.dic", "catalan.aff", "ca_ES.zip", "ca_ES.dic", "ca_ES.aff", "cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff", "cy_GB.zip", "cy_GB.dic", "cy_GB.aff", "da_DK.zip", "da_DK.dic", "da_DK.aff", "de_AT.zip", "de_AT.dic", "de_AT.aff", "de_CH.zip", "de_CH.dic", "de_CH.aff", "de_DE.zip", "de_DE.dic", "de_DE.aff", "de_DE_comb.zip", "de_DE_comb.dic", "de_DE_comb.aff", "de_DE_frami.zip", "de_DE_frami.dic", "de_DE_frami.aff", "de_DE_neu.zip", "de_DE_neu.dic", "de_DE_neu.aff", "el_GR.zip", "el_GR.dic", "el_GR.aff", "en_AU.zip", "en_AU.dic", "en_AU.aff", "en_CA.zip", "en_CA.dic", "en_CA.aff", "en_GB-oed.zip", "en_GB-oed.dic", "en_GB-oed.aff", "en_GB.zip", "en_GB.dic", "en_GB.aff", "en_NZ.zip", "en_NZ.dic", "en_NZ.aff", "eo.zip", "eo_l3.dic", "eo_l3.aff", "eo_EO.zip", "eo_EO.dic", "eo_EO.aff", "es_AR.zip", "es_AR.dic", "e
 s_AR.aff", "es_BO.zip", "es_BO.dic", "es_BO.aff", "es_CL.zip", "es_CL.dic", "es_CL.aff", "es_CO.zip", "es_CO.dic", "es_CO.aff", "es_CR.zip", "es_CR.dic", "es_CR.aff", "es_CU.zip", "es_CU.dic", "es_CU.aff", "es_DO.zip", "es_DO.dic", "es_DO.aff", "es_EC.zip", "es_EC.dic", "es_EC.aff", "es_ES.zip", "es_ES.dic", "es_ES.aff", "es_GT.zip", "es_GT.dic", "es_GT.aff", "es_HN.zip", "es_HN.dic", "es_HN.aff", "es_MX.zip", "es_MX.dic", "es_MX.aff", "es_NEW.zip", "es_NEW.dic", "es_NEW.aff", "es_NI.zip", "es_NI.dic", "es_NI.aff", "es_PA.zip", "es_PA.dic", "es_PA.aff", "es_PE.zip", "es_PE.dic", "es_PE.aff", "es_PR.zip", "es_PR.dic", "es_PR.aff", "es_PY.zip", "es_PY.dic", "es_PY.aff", "es_SV.zip", "es_SV.dic", "es_SV.aff", "es_UY.zip", "es_UY.dic", "es_UY.aff", "es_VE.zip", "es_VE.dic", "es_VE.aff", "et_EE.zip", "et_EE.dic", "et_EE.aff", "fo_FO.zip", "fo_FO.dic", "fo_FO.aff", "fr_FR-1990_1-3-2.zip", "fr_FR-1990.dic", "fr_FR-1990.aff", "fr_FR-classique_1-3-2.zip", "fr_FR-classique.dic", "fr_FR-classi
 que.aff", "fr_FR_1-3-2.zip", "fr_FR.dic", "fr_FR.aff", "fy_NL.zip", "fy_NL.dic", "fy_NL.aff", "ga_IE.zip", "ga_IE.dic", "ga_IE.aff", "gd_GB.zip", "gd_GB.dic", "gd_GB.aff", "gl_ES.zip", "gl_ES.dic", "gl_ES.aff", "gsc_FR.zip", "gsc_FR.dic", "gsc_FR.aff", "gu_IN.zip", "gu_IN.dic", "gu_IN.aff", "he_IL.zip", "he_IL.dic", "he_IL.aff", "hi_IN.zip", "hi_IN.dic", "hi_IN.aff", "hil_PH.zip", "hil_PH.dic", "hil_PH.aff", "hr_HR.zip", "hr_HR.dic", "hr_HR.aff", "hu_HU.zip", "hu_HU.dic", "hu_HU.aff", "hu_HU_comb.zip", "hu_HU.dic", "hu_HU.aff", "ia.zip", "ia.dic", "ia.aff", "id_ID.zip", "id_ID.dic", "id_ID.aff", "it_IT.zip", "it_IT.dic", "it_IT.aff", "ku_TR.zip", "ku_TR.dic", "ku_TR.aff", "la.zip", "la.dic", "la.aff", "lt_LT.zip", "lt_LT.dic", "lt_LT.aff", "lv_LV.zip", "lv_LV.dic", "lv_LV.aff", "mg_MG.zip", "mg_MG.dic", "mg_MG.aff", "mi_NZ.zip", "mi_NZ.dic", "mi_NZ.aff", "mk_MK.zip", "mk_MK.dic", "mk_MK.aff", "mos_BF.zip", "mos_BF.dic", "mos_BF.aff", "mr_IN.zip", "mr_IN.dic", "mr_IN.aff", "ms_MY.zip
 ", "ms_MY.dic", "ms_MY.aff", "nb_NO.zip", "nb_NO.dic", "nb_NO.aff", "ne_NP.zip", "ne_NP.dic", "ne_NP.aff", "nl_NL.zip", "nl_NL.dic", "nl_NL.aff", "nl_med.zip", "nl_med.dic", "nl_med.aff", "nn_NO.zip", "nn_NO.dic", "nn_NO.aff", "nr_ZA.zip", "nr_ZA.dic", "nr_ZA.aff", "ns_ZA.zip", "ns_ZA.dic", "ns_ZA.aff", "ny_MW.zip", "ny_MW.dic", "ny_MW.aff", "oc_FR.zip", "oc_FR.dic", "oc_FR.aff", "pl_PL.zip", "pl_PL.dic", "pl_PL.aff", "pt_BR.zip", "pt_BR.dic", "pt_BR.aff", "pt_PT.zip", "pt_PT.dic", "pt_PT.aff", "ro_RO.zip", "ro_RO.dic", "ro_RO.aff", "ru_RU.zip", "ru_RU.dic", "ru_RU.aff", "ru_RU_ye.zip", "ru_RU_ie.dic", "ru_RU_ie.aff", "ru_RU_yo.zip", "ru_RU_yo.dic", "ru_RU_yo.aff", "rw_RW.zip", "rw_RW.dic", "rw_RW.aff", "sk_SK.zip", "sk_SK.dic", "sk_SK.aff", "sl_SI.zip", "sl_SI.dic", "sl_SI.aff", "sq_AL.zip", "sq_AL.dic", "sq_AL.aff", "ss_ZA.zip", "ss_ZA.dic", "ss_ZA.aff", "st_ZA.zip", "st_ZA.dic", "st_ZA.aff", "sv_SE.zip", "sv_SE.dic", "sv_SE.aff", "sw_KE.zip", "sw_KE.dic", "sw_KE.aff", "tet_ID.zip
 ", "tet_ID.dic", "tet_ID.aff", "th_TH.zip", "th_TH.dic", "th_TH.aff", "tl_PH.zip", "tl_PH.dic", "tl_PH.aff", "tn_ZA.zip", "tn_ZA.dic", "tn_ZA.aff", "ts_ZA.zip", "ts_ZA.dic", "ts_ZA.aff", "uk_UA.zip", "uk_UA.dic", "uk_UA.aff", "ve_ZA.zip", "ve_ZA.dic", "ve_ZA.aff", "vi_VN.zip", "vi_VN.dic", "vi_VN.aff", "xh_ZA.zip", "xh_ZA.dic", "xh_ZA.aff", "zu_ZA.zip", "zu_ZA.dic", "zu_ZA.aff"};
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void test() throws Exception
-	  public virtual void test()
-	  {
-		for (int i = 0; i < tests.Length; i += 3)
-		{
-		  File f = new File(DICTIONARY_HOME, tests[i]);
-		  Debug.Assert(f.exists());
-
-		  using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
-		  {
-			ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-			Debug.Assert(dicEntry != null);
-			ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-			Debug.Assert(affEntry != null);
-
-			using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
-			{
-			  Dictionary dic = new Dictionary(affix, dictionary);
-			  Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" + "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " + "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " + "strips=" + RamUsageEstimator.humanSizeOf(dic.stripData) + ", " + "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " + "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " + "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " + "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
-			}
-		  }
-		}
-	  }
+    //using IOUtils = org.apache.lucene.util.IOUtils;
+    //using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+    //using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+    //using Ignore = org.junit.Ignore;
+
+    /// <summary>
+    /// Can be retrieved via:
+    /// wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
+    /// Note some of the files differ only in case. This may be a problem on your operating system!
+    /// </summary>
+
+    //[Ignore("Enable manually")]
+    public class TestAllDictionaries : LuceneTestCase
+    {
+
+        // set this to the location of where you downloaded all the files
+        internal static readonly FileInfo DICTIONARY_HOME = new FileInfo("/data/archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries");
+
+        internal readonly string[] tests = new string[] { "af_ZA.zip", "af_ZA.dic", "af_ZA.aff", "ak_GH.zip", "ak_GH.dic", "ak_GH.aff", "bg_BG.zip", "bg_BG.dic", "bg_BG.aff", "ca_ANY.zip", "catalan.dic", "catalan.aff", "ca_ES.zip", "ca_ES.dic", "ca_ES.aff", "cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff", "cy_GB.zip", "cy_GB.dic", "cy_GB.aff", "da_DK.zip", "da_DK.dic", "da_DK.aff", "de_AT.zip", "de_AT.dic", "de_AT.aff", "de_CH.zip", "de_CH.dic", "de_CH.aff", "de_DE.zip", "de_DE.dic", "de_DE.aff", "de_DE_comb.zip", "de_DE_comb.dic", "de_DE_comb.aff", "de_DE_frami.zip", "de_DE_frami.dic", "de_DE_frami.aff", "de_DE_neu.zip", "de_DE_neu.dic", "de_DE_neu.aff", "el_GR.zip", "el_GR.dic", "el_GR.aff", "en_AU.zip", "en_AU.dic", "en_AU.aff", "en_CA.zip", "en_CA.dic", "en_CA.aff", "en_GB-oed.zip", "en_GB-oed.dic", "en_GB-oed.aff", "en_GB.zip", "en_GB.dic", "en_GB.aff", "en_NZ.zip", "en_NZ.dic", "en_NZ.aff", "eo.zip", "eo_l3.dic", "eo_l3.aff", "eo_EO.zip", "eo_EO.dic", "eo_EO.aff", "es_AR.zip", "es_AR.di
 c", "es_AR.aff", "es_BO.zip", "es_BO.dic", "es_BO.aff", "es_CL.zip", "es_CL.dic", "es_CL.aff", "es_CO.zip", "es_CO.dic", "es_CO.aff", "es_CR.zip", "es_CR.dic", "es_CR.aff", "es_CU.zip", "es_CU.dic", "es_CU.aff", "es_DO.zip", "es_DO.dic", "es_DO.aff", "es_EC.zip", "es_EC.dic", "es_EC.aff", "es_ES.zip", "es_ES.dic", "es_ES.aff", "es_GT.zip", "es_GT.dic", "es_GT.aff", "es_HN.zip", "es_HN.dic", "es_HN.aff", "es_MX.zip", "es_MX.dic", "es_MX.aff", "es_NEW.zip", "es_NEW.dic", "es_NEW.aff", "es_NI.zip", "es_NI.dic", "es_NI.aff", "es_PA.zip", "es_PA.dic", "es_PA.aff", "es_PE.zip", "es_PE.dic", "es_PE.aff", "es_PR.zip", "es_PR.dic", "es_PR.aff", "es_PY.zip", "es_PY.dic", "es_PY.aff", "es_SV.zip", "es_SV.dic", "es_SV.aff", "es_UY.zip", "es_UY.dic", "es_UY.aff", "es_VE.zip", "es_VE.dic", "es_VE.aff", "et_EE.zip", "et_EE.dic", "et_EE.aff", "fo_FO.zip", "fo_FO.dic", "fo_FO.aff", "fr_FR-1990_1-3-2.zip", "fr_FR-1990.dic", "fr_FR-1990.aff", "fr_FR-classique_1-3-2.zip", "fr_FR-classique.dic", "fr_FR-
 classique.aff", "fr_FR_1-3-2.zip", "fr_FR.dic", "fr_FR.aff", "fy_NL.zip", "fy_NL.dic", "fy_NL.aff", "ga_IE.zip", "ga_IE.dic", "ga_IE.aff", "gd_GB.zip", "gd_GB.dic", "gd_GB.aff", "gl_ES.zip", "gl_ES.dic", "gl_ES.aff", "gsc_FR.zip", "gsc_FR.dic", "gsc_FR.aff", "gu_IN.zip", "gu_IN.dic", "gu_IN.aff", "he_IL.zip", "he_IL.dic", "he_IL.aff", "hi_IN.zip", "hi_IN.dic", "hi_IN.aff", "hil_PH.zip", "hil_PH.dic", "hil_PH.aff", "hr_HR.zip", "hr_HR.dic", "hr_HR.aff", "hu_HU.zip", "hu_HU.dic", "hu_HU.aff", "hu_HU_comb.zip", "hu_HU.dic", "hu_HU.aff", "ia.zip", "ia.dic", "ia.aff", "id_ID.zip", "id_ID.dic", "id_ID.aff", "it_IT.zip", "it_IT.dic", "it_IT.aff", "ku_TR.zip", "ku_TR.dic", "ku_TR.aff", "la.zip", "la.dic", "la.aff", "lt_LT.zip", "lt_LT.dic", "lt_LT.aff", "lv_LV.zip", "lv_LV.dic", "lv_LV.aff", "mg_MG.zip", "mg_MG.dic", "mg_MG.aff", "mi_NZ.zip", "mi_NZ.dic", "mi_NZ.aff", "mk_MK.zip", "mk_MK.dic", "mk_MK.aff", "mos_BF.zip", "mos_BF.dic", "mos_BF.aff", "mr_IN.zip", "mr_IN.dic", "mr_IN.aff", "ms_
 MY.zip", "ms_MY.dic", "ms_MY.aff", "nb_NO.zip", "nb_NO.dic", "nb_NO.aff", "ne_NP.zip", "ne_NP.dic", "ne_NP.aff", "nl_NL.zip", "nl_NL.dic", "nl_NL.aff", "nl_med.zip", "nl_med.dic", "nl_med.aff", "nn_NO.zip", "nn_NO.dic", "nn_NO.aff", "nr_ZA.zip", "nr_ZA.dic", "nr_ZA.aff", "ns_ZA.zip", "ns_ZA.dic", "ns_ZA.aff", "ny_MW.zip", "ny_MW.dic", "ny_MW.aff", "oc_FR.zip", "oc_FR.dic", "oc_FR.aff", "pl_PL.zip", "pl_PL.dic", "pl_PL.aff", "pt_BR.zip", "pt_BR.dic", "pt_BR.aff", "pt_PT.zip", "pt_PT.dic", "pt_PT.aff", "ro_RO.zip", "ro_RO.dic", "ro_RO.aff", "ru_RU.zip", "ru_RU.dic", "ru_RU.aff", "ru_RU_ye.zip", "ru_RU_ie.dic", "ru_RU_ie.aff", "ru_RU_yo.zip", "ru_RU_yo.dic", "ru_RU_yo.aff", "rw_RW.zip", "rw_RW.dic", "rw_RW.aff", "sk_SK.zip", "sk_SK.dic", "sk_SK.aff", "sl_SI.zip", "sl_SI.dic", "sl_SI.aff", "sq_AL.zip", "sq_AL.dic", "sq_AL.aff", "ss_ZA.zip", "ss_ZA.dic", "ss_ZA.aff", "st_ZA.zip", "st_ZA.dic", "st_ZA.aff", "sv_SE.zip", "sv_SE.dic", "sv_SE.aff", "sw_KE.zip", "sw_KE.dic", "sw_KE.aff", "tet_
 ID.zip", "tet_ID.dic", "tet_ID.aff", "th_TH.zip", "th_TH.dic", "th_TH.aff", "tl_PH.zip", "tl_PH.dic", "tl_PH.aff", "tn_ZA.zip", "tn_ZA.dic", "tn_ZA.aff", "ts_ZA.zip", "ts_ZA.dic", "ts_ZA.aff", "uk_UA.zip", "uk_UA.dic", "uk_UA.aff", "ve_ZA.zip", "ve_ZA.dic", "ve_ZA.aff", "vi_VN.zip", "vi_VN.dic", "vi_VN.aff", "xh_ZA.zip", "xh_ZA.dic", "xh_ZA.aff", "zu_ZA.zip", "zu_ZA.dic", "zu_ZA.aff" };
+
+        [Test]
+        public virtual void test()
+        {
+            for (int i = 0; i < tests.Length; i += 3)
+            {
+                FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
+                Debug.Assert(f.Exists);
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testOneDictionary() throws Exception
-	  public virtual void testOneDictionary()
-	  {
-		string toTest = "hu_HU.zip";
-		for (int i = 0; i < tests.Length; i++)
-		{
-		  if (tests[i].Equals(toTest))
-		  {
-			File f = new File(DICTIONARY_HOME, tests[i]);
-			Debug.Assert(f.exists());
-
-			using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
-			{
-			  ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-			  Debug.Assert(dicEntry != null);
-			  ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-			  Debug.Assert(affEntry != null);
-
-			  using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
+                using (ZipFile zip = new ZipFile(f, Encoding.UTF8))
+                {
+                    ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
+                    Debug.Assert(dicEntry != null);
+                    ZipEntry affEntry = zip.getEntry(tests[i + 2]);
+                    Debug.Assert(affEntry != null);
+
+                    using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
+			        {
+                Dictionary dic = new Dictionary(affix, dictionary);
+                Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" + "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " + "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " + "strips=" + RamUsageEstimator.humanSizeOf(dic.stripData) + ", " + "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " + "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " + "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " + "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
+            }
+        }
+    }
+
+
+    [Test]
+    public virtual void testOneDictionary()
+    {
+        string toTest = "hu_HU.zip";
+        for (int i = 0; i < tests.Length; i++)
+        {
+            if (tests[i].Equals(toTest))
+            {
+                File f = new File(DICTIONARY_HOME, tests[i]);
+                Debug.Assert(f.exists());
+
+                using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
+                {
+                    ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
+                    Debug.Assert(dicEntry != null);
+                    ZipEntry affEntry = zip.getEntry(tests[i + 2]);
+                    Debug.Assert(affEntry != null);
+
+                    using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
 			  {
-				  new Dictionary(affix, dictionary);
-			  }
-			}
-		  }
-		}
+            new Dictionary(affix, dictionary);
+        }
+    }
+
+}
 	  }
 	}
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
index f2e76ec..eaae4f5 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
@@ -1,7 +1,8 @@
 \ufeffusing System;
 using System.Diagnostics;
+using NUnit.Framework;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
 
 	/*

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
index 1a84783..cba206b 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCaseInsensitive.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,73 +19,70 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestCaseInsensitive : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init(true, "simple.aff", "mixedcase.dic");
-	  }
-
-	  public virtual void testCaseInsensitivity()
-	  {
-		assertStemsTo("lucene", "lucene", "lucen");
-		assertStemsTo("LuCeNe", "lucene", "lucen");
-		assertStemsTo("mahoute", "mahout");
-		assertStemsTo("MaHoUte", "mahout");
-	  }
-
-	  public virtual void testSimplePrefix()
-	  {
-		assertStemsTo("solr", "olr");
-	  }
-
-	  public virtual void testRecursiveSuffix()
-	  {
-		// we should not recurse here! as the suffix has no continuation!
-		assertStemsTo("abcd");
-	  }
+    public class TestCaseInsensitive : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init(true, "simple.aff", "mixedcase.dic");
+        }
 
-	  // all forms unmunched from dictionary
-	  public virtual void testAllStems()
-	  {
-		assertStemsTo("ab", "ab");
-		assertStemsTo("abc", "ab");
-		assertStemsTo("apach", "apach");
-		assertStemsTo("apache", "apach");
-		assertStemsTo("foo", "foo", "foo");
-		assertStemsTo("food", "foo");
-		assertStemsTo("foos", "foo");
-		assertStemsTo("lucen", "lucen");
-		assertStemsTo("lucene", "lucen", "lucene");
-		assertStemsTo("mahout", "mahout");
-		assertStemsTo("mahoute", "mahout");
-		assertStemsTo("moo", "moo");
-		assertStemsTo("mood", "moo");
-		assertStemsTo("olr", "olr");
-		assertStemsTo("solr", "olr");
-	  }
+        [Test]
+        public virtual void TestCaseInsensitivity()
+        {
+            AssertStemsTo("lucene", "lucene", "lucen");
+            AssertStemsTo("LuCeNe", "lucene", "lucen");
+            AssertStemsTo("mahoute", "mahout");
+            AssertStemsTo("MaHoUte", "mahout");
+        }
+        [Test]
+        public virtual void TestSimplePrefix()
+        {
+            AssertStemsTo("solr", "olr");
+        }
+        [Test]
+        public virtual void TestRecursiveSuffix()
+        {
+            // we should not recurse here! as the suffix has no continuation!
+            AssertStemsTo("abcd");
+        }
 
-	  // some bogus stuff that should not stem (empty lists)!
-	  public virtual void testBogusStems()
-	  {
-		assertStemsTo("abs");
-		assertStemsTo("abe");
-		assertStemsTo("sab");
-		assertStemsTo("sapach");
-		assertStemsTo("sapache");
-		assertStemsTo("apachee");
-		assertStemsTo("sfoo");
-		assertStemsTo("sfoos");
-		assertStemsTo("fooss");
-		assertStemsTo("lucenee");
-		assertStemsTo("solre");
-	  }
-	}
+        // all forms unmunched from dictionary
+        [Test]
+        public virtual void TestAllStems()
+        {
+            AssertStemsTo("ab", "ab");
+            AssertStemsTo("abc", "ab");
+            AssertStemsTo("apach", "apach");
+            AssertStemsTo("apache", "apach");
+            AssertStemsTo("foo", "foo", "foo");
+            AssertStemsTo("food", "foo");
+            AssertStemsTo("foos", "foo");
+            AssertStemsTo("lucen", "lucen");
+            AssertStemsTo("lucene", "lucen", "lucene");
+            AssertStemsTo("mahout", "mahout");
+            AssertStemsTo("mahoute", "mahout");
+            AssertStemsTo("moo", "moo");
+            AssertStemsTo("mood", "moo");
+            AssertStemsTo("olr", "olr");
+            AssertStemsTo("solr", "olr");
+        }
 
+        // some bogus stuff that should not stem (empty lists)!
+        [Test]
+        public virtual void TestBogusStems()
+        {
+            AssertStemsTo("abs");
+            AssertStemsTo("abe");
+            AssertStemsTo("sab");
+            AssertStemsTo("sapach");
+            AssertStemsTo("sapache");
+            AssertStemsTo("apachee");
+            AssertStemsTo("sfoo");
+            AssertStemsTo("sfoos");
+            AssertStemsTo("fooss");
+            AssertStemsTo("lucenee");
+            AssertStemsTo("solre");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
index 0fd32a8..d0c3668 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCircumfix.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,29 +19,24 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestCircumfix : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("circumfix.aff", "circumfix.dic");
-	  }
-
-	  public virtual void testCircumfix()
-	  {
-		assertStemsTo("nagy", "nagy");
-		assertStemsTo("nagyobb", "nagy");
-		assertStemsTo("legnagyobb", "nagy");
-		assertStemsTo("legeslegnagyobb", "nagy");
-		assertStemsTo("nagyobbobb");
-		assertStemsTo("legnagy");
-		assertStemsTo("legeslegnagy");
-	  }
-	}
+    public class TestCircumfix_ : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("circumfix.aff", "circumfix.dic");
+        }
 
+        [Test]
+        public virtual void TestCircumfix()
+        {
+            AssertStemsTo("nagy", "nagy");
+            AssertStemsTo("nagyobb", "nagy");
+            AssertStemsTo("legnagyobb", "nagy");
+            AssertStemsTo("legeslegnagyobb", "nagy");
+            AssertStemsTo("nagyobbobb");
+            AssertStemsTo("legnagy");
+            AssertStemsTo("legeslegnagy");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
index ee892b4..5c82714 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestComplexPrefix.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,30 +19,26 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestComplexPrefix : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("complexprefix.aff", "complexprefix.dic");
-	  }
-
-	  public virtual void testPrefixes()
-	  {
-		assertStemsTo("ptwofoo", "foo");
-		assertStemsTo("poneptwofoo", "foo");
-		assertStemsTo("foosuf", "foo");
-		assertStemsTo("ptwofoosuf", "foo");
-		assertStemsTo("poneptwofoosuf", "foo");
-		assertStemsTo("ponefoo");
-		assertStemsTo("ponefoosuf");
-		assertStemsTo("ptwoponefoo");
-		assertStemsTo("ptwoponefoosuf");
-	  }
-	}
+    public class TestComplexPrefix : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("complexprefix.aff", "complexprefix.dic");
+        }
 
+        [Test]
+        public virtual void TestPrefixes()
+        {
+            AssertStemsTo("ptwofoo", "foo");
+            AssertStemsTo("poneptwofoo", "foo");
+            AssertStemsTo("foosuf", "foo");
+            AssertStemsTo("ptwofoosuf", "foo");
+            AssertStemsTo("poneptwofoosuf", "foo");
+            AssertStemsTo("ponefoo");
+            AssertStemsTo("ponefoosuf");
+            AssertStemsTo("ptwoponefoo");
+            AssertStemsTo("ptwoponefoosuf");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
index af1f60d..b9f0a41 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestCondition.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,33 +19,29 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestCondition : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("condition.aff", "condition.dic");
-	  }
-
-	  public virtual void testStemming()
-	  {
-		assertStemsTo("hello", "hello");
-		assertStemsTo("try", "try");
-		assertStemsTo("tried", "try");
-		assertStemsTo("work", "work");
-		assertStemsTo("worked", "work");
-		assertStemsTo("rework", "work");
-		assertStemsTo("reworked", "work");
-		assertStemsTo("retried");
-		assertStemsTo("workied");
-		assertStemsTo("tryed");
-		assertStemsTo("tryied");
-		assertStemsTo("helloed");
-	  }
-	}
+    public class TestCondition : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("condition.aff", "condition.dic");
+        }
 
+        [Test]
+        public virtual void TestStemming()
+        {
+            AssertStemsTo("hello", "hello");
+            AssertStemsTo("try", "try");
+            AssertStemsTo("tried", "try");
+            AssertStemsTo("work", "work");
+            AssertStemsTo("worked", "work");
+            AssertStemsTo("rework", "work");
+            AssertStemsTo("reworked", "work");
+            AssertStemsTo("retried");
+            AssertStemsTo("workied");
+            AssertStemsTo("tryed");
+            AssertStemsTo("tryied");
+            AssertStemsTo("helloed");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
index 0a1f209..9c2ad74 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestConv.cs
@@ -1,9 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
-
-	using BeforeClass = org.junit.BeforeClass;
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,25 +19,21 @@
 	 * limitations under the License.
 	 */
 
-	public class TestConv : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("conv.aff", "conv.dic");
-	  }
-
-	  public virtual void testConversion()
-	  {
-		assertStemsTo("drink", "drInk");
-		assertStemsTo("drInk", "drInk");
-		assertStemsTo("drInkAble", "drInk");
-		assertStemsTo("drInkABle", "drInk");
-		assertStemsTo("drinkABle", "drInk");
-	  }
-	}
-
+    public class TestConv : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("conv.aff", "conv.dic");
+        }
+        [Test]
+        public virtual void TestConversion()
+        {
+            AssertStemsTo("drink", "drInk");
+            AssertStemsTo("drInk", "drInk");
+            AssertStemsTo("drInkAble", "drInk");
+            AssertStemsTo("drInkABle", "drInk");
+            AssertStemsTo("drinkABle", "drInk");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
index 79a2b34..5e2fc48 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDependencies.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,30 +19,25 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestDependencies : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("dependencies.aff", "dependencies.dic");
-	  }
-
-	  public virtual void testDependencies()
-	  {
-		assertStemsTo("drink", "drink", "drink");
-		assertStemsTo("drinks", "drink", "drink");
-		assertStemsTo("drinkable", "drink");
-		assertStemsTo("drinkables", "drink");
-		assertStemsTo("undrinkable", "drink");
-		assertStemsTo("undrinkables", "drink");
-		assertStemsTo("undrink");
-		assertStemsTo("undrinks");
-	  }
-	}
+    public class TestDependencies_ : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("dependencies.aff", "dependencies.dic");
+        }
+        [Test]
+        public virtual void TestDependencies()
+        {
+            AssertStemsTo("drink", "drink", "drink");
+            AssertStemsTo("drinks", "drink", "drink");
+            AssertStemsTo("drinkable", "drink");
+            AssertStemsTo("drinkables", "drink");
+            AssertStemsTo("undrinkable", "drink");
+            AssertStemsTo("undrinkables", "drink");
+            AssertStemsTo("undrink");
+            AssertStemsTo("undrinks");
+        }
+    }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
index 266f4f5..7e3cde0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestDictionary.cs
@@ -1,10 +1,15 @@
 \ufeffusing System;
 using System.Text;
+using NUnit.Framework;
+using Lucene.Net.Util;
+using System.IO;
+using Lucene.Net.Util.Fst;
+using Lucene.Net.Support;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
 
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -21,273 +26,328 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-
-	using BytesRef = org.apache.lucene.util.BytesRef;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using IntsRef = org.apache.lucene.util.IntsRef;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-	using Builder = org.apache.lucene.util.fst.Builder;
-	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
-	using FST = org.apache.lucene.util.fst.FST;
-	using Outputs = org.apache.lucene.util.fst.Outputs;
-	using Util = org.apache.lucene.util.fst.Util;
-
-	public class TestDictionary : LuceneTestCase
-	{
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testSimpleDictionary() throws Exception
-	  public virtual void testSimpleDictionary()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("simple.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic");
-
-		Dictionary dictionary = new Dictionary(affixStream, dictStream);
-		assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
-		assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
-		IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
-		assertNotNull(ordList);
-		assertEquals(1, ordList.length);
-
-		BytesRef @ref = new BytesRef();
-		dictionary.flagLookup.get(ordList.ints[0], @ref);
-		char[] flags = Dictionary.decodeFlags(@ref);
-		assertEquals(1, flags.Length);
-
-		ordList = dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5);
-		assertNotNull(ordList);
-		assertEquals(1, ordList.length);
-		dictionary.flagLookup.get(ordList.ints[0], @ref);
-		flags = Dictionary.decodeFlags(@ref);
-		assertEquals(1, flags.Length);
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testCompressedDictionary() throws Exception
-	  public virtual void testCompressedDictionary()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic");
-
-		Dictionary dictionary = new Dictionary(affixStream, dictStream);
-		assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
-		assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
-		IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
-		BytesRef @ref = new BytesRef();
-		dictionary.flagLookup.get(ordList.ints[0], @ref);
-		char[] flags = Dictionary.decodeFlags(@ref);
-		assertEquals(1, flags.Length);
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testCompressedBeforeSetDictionary() throws Exception
-	  public virtual void testCompressedBeforeSetDictionary()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-before-set.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic");
-
-		Dictionary dictionary = new Dictionary(affixStream, dictStream);
-		assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
-		assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
-		IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
-		BytesRef @ref = new BytesRef();
-		dictionary.flagLookup.get(ordList.ints[0], @ref);
-		char[] flags = Dictionary.decodeFlags(@ref);
-		assertEquals(1, flags.Length);
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testCompressedEmptyAliasDictionary() throws Exception
-	  public virtual void testCompressedEmptyAliasDictionary()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-empty-alias.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic");
-
-		Dictionary dictionary = new Dictionary(affixStream, dictStream);
-		assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
-		assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
-		IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
-		BytesRef @ref = new BytesRef();
-		dictionary.flagLookup.get(ordList.ints[0], @ref);
-		char[] flags = Dictionary.decodeFlags(@ref);
-		assertEquals(1, flags.Length);
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-	  // malformed rule causes ParseException
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testInvalidData() throws Exception
-	  public virtual void testInvalidData()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("broken.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic");
-
-		try
-		{
-		  new Dictionary(affixStream, dictStream);
-		  fail("didn't get expected exception");
-		}
-		catch (ParseException expected)
-		{
-		  assertTrue(expected.Message.startsWith("The affix file contains a rule with less than four elements"));
-		  assertEquals(24, expected.ErrorOffset);
-		}
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-	  // malformed flags causes ParseException
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testInvalidFlags() throws Exception
-	  public virtual void testInvalidFlags()
-	  {
-		System.IO.Stream affixStream = this.GetType().getResourceAsStream("broken-flags.aff");
-		System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic");
-
-		try
-		{
-		  new Dictionary(affixStream, dictStream);
-		  fail("didn't get expected exception");
-		}
-		catch (Exception expected)
-		{
-		  assertTrue(expected.Message.startsWith("expected only one flag"));
-		}
-
-		affixStream.Close();
-		dictStream.Close();
-	  }
-
-	  private class CloseCheckInputStream : FilterInputStream
-	  {
-		  private readonly TestDictionary outerInstance;
-
-		internal bool closed = false;
-
-		public CloseCheckInputStream(TestDictionary outerInstance, System.IO.Stream @delegate) : base(@delegate)
-		{
-			this.outerInstance = outerInstance;
-		}
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
-		public override void close()
-		{
-		  this.closed = true;
-		  base.close();
-		}
-
-		public virtual bool Closed
-		{
-			get
-			{
-			  return this.closed;
-			}
-		}
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testResourceCleanup() throws Exception
-	  public virtual void testResourceCleanup()
-	  {
-		CloseCheckInputStream affixStream = new CloseCheckInputStream(this, this.GetType().getResourceAsStream("compressed.aff"));
-		CloseCheckInputStream dictStream = new CloseCheckInputStream(this, this.GetType().getResourceAsStream("compressed.dic"));
-
-		new Dictionary(affixStream, dictStream);
-
-		assertFalse(affixStream.Closed);
-		assertFalse(dictStream.Closed);
-
-		affixStream.close();
-		dictStream.close();
-
-		assertTrue(affixStream.Closed);
-		assertTrue(dictStream.Closed);
-	  }
-
-
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testReplacements() throws Exception
-	  public virtual void testReplacements()
-	  {
-		Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
-		Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
-		IntsRef scratchInts = new IntsRef();
-
-		// a -> b
-		Util.toUTF16("a", scratchInts);
-		builder.add(scratchInts, new CharsRef("b"));
-
-		// ab -> c
-		Util.toUTF16("ab", scratchInts);
-		builder.add(scratchInts, new CharsRef("c"));
-
-		// c -> de
-		Util.toUTF16("c", scratchInts);
-		builder.add(scratchInts, new CharsRef("de"));
-
-		// def -> gh
-		Util.toUTF16("def", scratchInts);
-		builder.add(scratchInts, new CharsRef("gh"));
-
-		FST<CharsRef> fst = builder.finish();
-
-		StringBuilder sb = new StringBuilder("atestanother");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("btestbnother", sb.ToString());
-
-		sb = new StringBuilder("abtestanother");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("ctestbnother", sb.ToString());
-
-		sb = new StringBuilder("atestabnother");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("btestcnother", sb.ToString());
-
-		sb = new StringBuilder("abtestabnother");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("ctestcnother", sb.ToString());
-
-		sb = new StringBuilder("abtestabcnother");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("ctestcdenother", sb.ToString());
-
-		sb = new StringBuilder("defdefdefc");
-		Dictionary.applyMappings(fst, sb);
-		assertEquals("ghghghde", sb.ToString());
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testSetWithCrazyWhitespaceAndBOMs() throws Exception
-	  public virtual void testSetWithCrazyWhitespaceAndBOMs()
-	  {
-		assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".GetBytes(StandardCharsets.UTF_8))));
-		assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".GetBytes(StandardCharsets.UTF_8))));
-		assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".GetBytes(StandardCharsets.UTF_8))));
-		assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".GetBytes(StandardCharsets.UTF_8))));
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testFlagWithCrazyWhitespace() throws Exception
-	  public virtual void testFlagWithCrazyWhitespace()
-	  {
-		assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
-		assertNotNull(Dictionary.getFlagParsingStrategy("FLAG    UTF-8"));
-	  }
-	}
-
+    public class TestDictionary : LuceneTestCase
+    {
+
+        [Test]
+        public virtual void TestSimpleDictionary()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("simple.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic"))
+                {
+
+                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
+                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
+                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
+                    assertNotNull(ordList);
+                    assertEquals(1, ordList.Length);
+
+                    BytesRef @ref = new BytesRef();
+                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
+                    char[] flags = Dictionary.DecodeFlags(@ref);
+                    assertEquals(1, flags.Length);
+
+                    ordList = dictionary.LookupWord(new char[] { 'l', 'u', 'c', 'e', 'n' }, 0, 5);
+                    assertNotNull(ordList);
+                    assertEquals(1, ordList.Length);
+                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
+                    flags = Dictionary.DecodeFlags(@ref);
+                    assertEquals(1, flags.Length);
+                }
+            }
+        }
+
+        [Test]
+        public virtual void TestCompressedDictionary()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic"))
+                {
+
+                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
+                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
+                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
+                    BytesRef @ref = new BytesRef();
+                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
+                    char[] flags = Dictionary.DecodeFlags(@ref);
+                    assertEquals(1, flags.Length);
+                }
+            }
+        }
+
+        [Test]
+        public virtual void TestCompressedBeforeSetDictionary()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-before-set.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic"))
+                {
+
+                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
+                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
+                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
+                    BytesRef @ref = new BytesRef();
+                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
+                    char[] flags = Dictionary.DecodeFlags(@ref);
+                    assertEquals(1, flags.Length);
+                }
+            }
+        }
+
+        [Test]
+        public virtual void TestCompressedEmptyAliasDictionary()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("compressed-empty-alias.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("compressed.dic"))
+                {
+                    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+                    assertEquals(3, dictionary.LookupSuffix(new char[] { 'e' }, 0, 1).Length);
+                    assertEquals(1, dictionary.LookupPrefix(new char[] { 's' }, 0, 1).Length);
+                    IntsRef ordList = dictionary.LookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
+                    BytesRef @ref = new BytesRef();
+                    dictionary.flagLookup.Get(ordList.Ints[0], @ref);
+                    char[] flags = Dictionary.DecodeFlags(@ref);
+                    assertEquals(1, flags.Length);
+                }
+            }
+        }
+
+        // malformed rule causes ParseException
+        [Test]
+        public virtual void TestInvalidData()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("broken.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic"))
+                {
+
+                    try
+                    {
+                        new Dictionary(affixStream, dictStream);
+                        fail("didn't get expected exception");
+                    }
+                    catch (Exception expected)
+                    {
+                        assertTrue(expected.Message.StartsWith("The affix file contains a rule with less than four elements"));
+                        //assertEquals(24, expected.ErrorOffset); // No parse exception in LUCENENET
+                    }
+                }
+            }
+        }
+
+        // malformed flags causes ParseException
+        [Test]
+        public virtual void TestInvalidFlags()
+        {
+            using (System.IO.Stream affixStream = this.GetType().getResourceAsStream("broken-flags.aff"))
+            {
+                using (System.IO.Stream dictStream = this.GetType().getResourceAsStream("simple.dic"))
+                {
+                    try
+                    {
+                        new Dictionary(affixStream, dictStream);
+                        fail("didn't get expected exception");
+                    }
+                    catch (Exception expected)
+                    {
+                        assertTrue(expected.Message.StartsWith("expected only one flag"));
+                    }
+                }
+            }
+        }
+
+        private class CloseCheckInputStream : Stream, IDisposable
+        {
+            private readonly TestDictionary outerInstance;
+            private readonly Stream @delegate;
+
+            internal bool disposed = false;
+
+            public override bool CanRead
+            {
+                get
+                {
+                    return @delegate.CanRead;
+                }
+            }
+
+            public override bool CanSeek
+            {
+                get
+                {
+                    return @delegate.CanSeek;
+                }
+            }
+
+            public override bool CanWrite
+            {
+                get
+                {
+                    return @delegate.CanWrite;
+                }
+            }
+
+            public override long Length
+            {
+                get
+                {
+                    return @delegate.Length;
+                }
+            }
+
+            public override long Position
+            {
+                get
+                {
+                    return @delegate.Position;
+                }
+
+                set
+                {
+                    @delegate.Position = value;
+                }
+            }
+
+            public CloseCheckInputStream(TestDictionary outerInstance, System.IO.Stream @delegate) 
+            {
+                this.@delegate = @delegate;
+                this.outerInstance = outerInstance;
+            }
+
+            public override void Close()
+            {
+                @delegate.Close();
+            }
+
+
+            new public void Dispose()
+            {
+                this.disposed = true;
+                base.Dispose();
+            }
+            
+
+            public virtual bool Disposed
+            {
+                get { return this.disposed; }
+            }
+
+            public override void Flush()
+            {
+                @delegate.Flush();
+            }
+
+            public override long Seek(long offset, SeekOrigin origin)
+            {
+                return @delegate.Seek(offset, origin);
+            }
+
+            public override void SetLength(long value)
+            {
+                @delegate.SetLength(value);
+            }
+
+            public override int Read(byte[] buffer, int offset, int count)
+            {
+                return @delegate.Read(buffer, offset, count);
+            }
+
+            public override void Write(byte[] buffer, int offset, int count)
+            {
+                @delegate.Write(buffer, offset, count);
+            }
+        }
+
+        [Test]
+        public virtual void TestResourceCleanup()
+        {
+            CloseCheckInputStream affixStream = new CloseCheckInputStream(this, this.GetType().getResourceAsStream("compressed.aff"));
+            CloseCheckInputStream dictStream = new CloseCheckInputStream(this, this.GetType().getResourceAsStream("compressed.dic"));
+
+            new Dictionary(affixStream, dictStream);
+
+            assertFalse(affixStream.Disposed);
+            assertFalse(dictStream.Disposed);
+
+            affixStream.Dispose();
+            dictStream.Dispose();
+
+            assertTrue(affixStream.Disposed);
+            assertTrue(dictStream.Disposed);
+        }
+
+
+
+        [Test]
+        public virtual void TestReplacements()
+        {
+            Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+            Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
+            IntsRef scratchInts = new IntsRef();
+
+            // a -> b
+            Lucene.Net.Util.Fst.Util.ToUTF16("a", scratchInts);
+            builder.Add(scratchInts, new CharsRef("b"));
+
+            // ab -> c
+            Lucene.Net.Util.Fst.Util.ToUTF16("ab", scratchInts);
+            builder.Add(scratchInts, new CharsRef("c"));
+
+            // c -> de
+            Lucene.Net.Util.Fst.Util.ToUTF16("c", scratchInts);
+            builder.Add(scratchInts, new CharsRef("de"));
+
+            // def -> gh
+            Lucene.Net.Util.Fst.Util.ToUTF16("def", scratchInts);
+            builder.Add(scratchInts, new CharsRef("gh"));
+
+            FST<CharsRef> fst = builder.Finish();
+
+            StringBuilder sb = new StringBuilder("atestanother");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("btestbnother", sb.ToString());
+
+            sb = new StringBuilder("abtestanother");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("ctestbnother", sb.ToString());
+
+            sb = new StringBuilder("atestabnother");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("btestcnother", sb.ToString());
+
+            sb = new StringBuilder("abtestabnother");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("ctestcnother", sb.ToString());
+
+            sb = new StringBuilder("abtestabcnother");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("ctestcdenother", sb.ToString());
+
+            sb = new StringBuilder("defdefdefc");
+            Dictionary.ApplyMappings(fst, sb);
+            assertEquals("ghghghde", sb.ToString());
+        }
+
+        [Test]
+        public virtual void TestSetWithCrazyWhitespaceAndBOMs()
+        {
+            assertEquals("UTF-8", Dictionary.GetDictionaryEncoding(new MemoryStream("SET\tUTF-8\n".GetBytes(Encoding.UTF8))));
+            assertEquals("UTF-8", Dictionary.GetDictionaryEncoding(new MemoryStream("SET\t UTF-8\n".GetBytes(Encoding.UTF8))));
+            assertEquals("UTF-8", Dictionary.GetDictionaryEncoding(new MemoryStream("\uFEFFSET\tUTF-8\n".GetBytes(Encoding.UTF8))));
+            assertEquals("UTF-8", Dictionary.GetDictionaryEncoding(new MemoryStream("\uFEFFSET\tUTF-8\r\n".GetBytes(Encoding.UTF8))));
+        }
+
+        [Test]
+        public virtual void TestFlagWithCrazyWhitespace()
+        {
+            assertNotNull(Dictionary.GetFlagParsingStrategy("FLAG\tUTF-8"));
+            assertNotNull(Dictionary.GetFlagParsingStrategy("FLAG    UTF-8"));
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
index 1a50fe4..8db5602 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestEscaped.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,27 +19,22 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestEscaped : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("escaped.aff", "escaped.dic");
-	  }
-
-	  public virtual void testStemming()
-	  {
-		assertStemsTo("works", "work");
-		assertStemsTo("work", "work");
-		assertStemsTo("R2/D2", "R2/D2");
-		assertStemsTo("R2/D2s", "R2/D2");
-		assertStemsTo("N/A", "N/A");
-		assertStemsTo("N/As");
-	  }
-	}
-
+    public class TestEscaped : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("escaped.aff", "escaped.dic");
+        }
+        [Test]
+        public virtual void TestStemming()
+        {
+            AssertStemsTo("works", "work");
+            AssertStemsTo("work", "work");
+            AssertStemsTo("R2/D2", "R2/D2");
+            AssertStemsTo("R2/D2s", "R2/D2");
+            AssertStemsTo("N/A", "N/A");
+            AssertStemsTo("N/As");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
index c94e5fb..cb589ec 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagLong.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,24 +19,19 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestFlagLong : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("flaglong.aff", "flaglong.dic");
-	  }
-
-	  public virtual void testLongFlags()
-	  {
-		assertStemsTo("foo", "foo");
-		assertStemsTo("foos", "foo");
-		assertStemsTo("fooss");
-	  }
-	}
-
+    public class TestFlagLong : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("flaglong.aff", "flaglong.dic");
+        }
+        [Test]
+        public virtual void TestLongFlags()
+        {
+            AssertStemsTo("foo", "foo");
+            AssertStemsTo("foos", "foo");
+            AssertStemsTo("fooss");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
index 4873fc7..d1b16a3 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestFlagNum.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,24 +19,19 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestFlagNum : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("flagnum.aff", "flagnum.dic");
-	  }
-
-	  public virtual void testNumFlags()
-	  {
-		assertStemsTo("foo", "foo");
-		assertStemsTo("foos", "foo");
-		assertStemsTo("fooss");
-	  }
-	}
-
+    public class TestFlagNum : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("flagnum.aff", "flagnum.dic");
+        }
+        [Test]
+        public virtual void TestNumFlags()
+        {
+            AssertStemsTo("foo", "foo");
+            AssertStemsTo("foos", "foo");
+            AssertStemsTo("fooss");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
index 11bb494..e07a15b 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHomonyms.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,23 +19,18 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestHomonyms : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("homonyms.aff", "homonyms.dic");
-	  }
-
-	  public virtual void testExamples()
-	  {
-		assertStemsTo("works", "work", "work");
-	  }
-	}
+    public class TestHomonyms : StemmerTestBase
+    {
 
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("homonyms.aff", "homonyms.dic");
+        }
+        [Test]
+        public virtual void TestExamples()
+        {
+            AssertStemsTo("works", "work", "work");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
index 6c3e22b..413d99b 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilter.cs
@@ -1,7 +1,14 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
+\ufeffusing Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,161 +25,142 @@
 	 * limitations under the License.
 	 */
 
-
-	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
-	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using AfterClass = org.junit.AfterClass;
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestHunspellStemFilter : BaseTokenStreamTestCase
-	{
-	  private static Dictionary dictionary;
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
-		System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
-		try
-		{
-		  dictionary = new Dictionary(affixStream, dictStream);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affixStream, dictStream);
-		}
-	  }
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @AfterClass public static void afterClass()
-	  public static void afterClass()
-	  {
-		dictionary = null;
-	  }
-
-	  /// <summary>
-	  /// Simple test for KeywordAttribute </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testKeywordAttribute() throws java.io.IOException
-	  public virtual void testKeywordAttribute()
-	  {
-		MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
-		tokenizer.EnableChecks = true;
-		HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);
-		assertTokenStreamContents(filter, new string[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
-
-		// assert with keyword marker
-		tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
-		CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
-		filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
-		assertTokenStreamContents(filter, new string[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
-	  }
-
-	  /// <summary>
-	  /// simple test for longestOnly option </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testLongestOnly() throws java.io.IOException
-	  public virtual void testLongestOnly()
-	  {
-		MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
-		tokenizer.EnableChecks = true;
-		HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
-		assertTokenStreamContents(filter, new string[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
-	  }
-
-	  /// <summary>
-	  /// blast some random strings through the analyzer </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testRandomStrings() throws Exception
-	  public virtual void testRandomStrings()
-	  {
-		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
-		checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
-	  {
-		  private readonly TestHunspellStemFilter outerInstance;
-
-		  public AnalyzerAnonymousInnerClassHelper(TestHunspellStemFilter outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-		  }
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-			return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
-	  public virtual void testEmptyTerm()
-	  {
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
-		checkOneTerm(a, "", "");
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-	  {
-		  private readonly TestHunspellStemFilter outerInstance;
-
-		  public AnalyzerAnonymousInnerClassHelper2(TestHunspellStemFilter outerInstance)
-		  {
-			  this.outerInstance = outerInstance;
-		  }
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
-		  }
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testIgnoreCaseNoSideEffects() throws Exception
-	  public virtual void testIgnoreCaseNoSideEffects()
-	  {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.apache.lucene.analysis.hunspell.Dictionary d;
-		Dictionary d;
-		System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
-		System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
-		try
-		{
-		  d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affixStream, dictStream);
-		}
-		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);
-		checkOneTerm(a, "NoChAnGy", "NoChAnGy");
-	  }
-
-	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
-	  {
-		  private readonly TestHunspellStemFilter outerInstance;
-
-		  private Dictionary d;
-
-		  public AnalyzerAnonymousInnerClassHelper3(TestHunspellStemFilter outerInstance, Dictionary d)
-		  {
-			  this.outerInstance = outerInstance;
-			  this.d = d;
-		  }
-
-		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
-		  {
-			Tokenizer tokenizer = new KeywordTokenizer(reader);
-			return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
-		  }
-	  }
-	}
-
+    public class TestHunspellStemFilter : BaseTokenStreamTestCase
+    {
+        private static Dictionary dictionary;
+
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
+            System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
+            try
+            {
+                dictionary = new Dictionary(affixStream, dictStream);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affixStream, dictStream);
+            }
+        }
+
+        [TestFixtureTearDown]
+        public static void afterClass()
+        {
+            dictionary = null;
+        }
+
+        /// <summary>
+        /// Simple test for KeywordAttribute </summary>
+        [Test]
+        public virtual void TestKeywordAttribute()
+        {
+            MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
+            tokenizer.EnableChecks = true;
+            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary);
+            AssertTokenStreamContents(filter, new string[] { "lucene", "lucen", "is", "awesome" }, new int[] { 1, 0, 1, 1 });
+
+            // assert with keyword marker
+            tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
+            CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.AsList("Lucene"), true);
+            filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary);
+            AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
+        }
+
+        /// <summary>
+        /// simple test for longestOnly option </summary>
+        [Test]
+        public virtual void TestLongestOnly()
+        {
+            MockTokenizer tokenizer = new MockTokenizer(new StringReader("lucene is awesome"));
+            tokenizer.EnableChecks = true;
+            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
+            AssertTokenStreamContents(filter, new string[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+            CheckRandomData(Random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly TestHunspellStemFilter outerInstance;
+
+            public AnalyzerAnonymousInnerClassHelper(TestHunspellStemFilter outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
+            }
+        }
+
+        [Test]
+        public virtual void TestEmptyTerm()
+        {
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+            CheckOneTerm(a, "", "");
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+        {
+            private readonly TestHunspellStemFilter outerInstance;
+
+            public AnalyzerAnonymousInnerClassHelper2(TestHunspellStemFilter outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
+            }
+        }
+
+        [Test]
+        public virtual void TestIgnoreCaseNoSideEffects()
+        {
+            Dictionary d;
+            System.IO.Stream affixStream = typeof(TestStemmer).getResourceAsStream("simple.aff");
+            System.IO.Stream dictStream = typeof(TestStemmer).getResourceAsStream("simple.dic");
+            try
+            {
+                d = new Dictionary(affixStream, Arrays.AsList(dictStream), true);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affixStream, dictStream);
+            }
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this, d);
+            CheckOneTerm(a, "NoChAnGy", "NoChAnGy");
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+        {
+            private readonly TestHunspellStemFilter outerInstance;
+
+            private Dictionary d;
+
+            public AnalyzerAnonymousInnerClassHelper3(TestHunspellStemFilter outerInstance, Dictionary d)
+            {
+                this.outerInstance = outerInstance;
+                this.d = d;
+            }
+
+            public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
index 2073e7b..fbfb203 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestHunspellStemFilterFactory.cs
@@ -1,7 +1,10 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing Lucene.Net.Analysis.Util;
+using NUnit.Framework;
+using System.IO;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,40 +21,34 @@
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// Simple tests to ensure the Hunspell stemmer loads from factory
+    /// </summary>
+    public class TestHunspellStemFilterFactory : BaseTokenStreamFactoryTestCase
+    {
+        [Test]
+        public virtual void TestStemming()
+        {
+            TextReader reader = new StringReader("abc");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "affix", "simple.aff").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "ab" });
+        }
 
-	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-	/// <summary>
-	/// Simple tests to ensure the Hunspell stemmer loads from factory
-	/// </summary>
-	public class TestHunspellStemFilterFactory : BaseTokenStreamFactoryTestCase
-	{
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testStemming() throws Exception
-	  public virtual void testStemming()
-	  {
-		Reader reader = new StringReader("abc");
-		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-		stream = tokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "affix", "simple.aff").create(stream);
-		assertTokenStreamContents(stream, new string[] {"ab"});
-	  }
-
-	  /// <summary>
-	  /// Test that bogus arguments result in exception </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testBogusArguments() throws Exception
-	  public virtual void testBogusArguments()
-	  {
-		try
-		{
-		  tokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "bogusArg", "bogusValue");
-		  fail();
-		}
-		catch (System.ArgumentException expected)
-		{
-		  assertTrue(expected.Message.contains("Unknown parameters"));
-		}
-	  }
-	}
-
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenFilterFactory("HunspellStem", "dictionary", "simple.dic", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
index 77f5b84..bc96964 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestIgnore.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,27 +19,22 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestIgnore : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("ignore.aff", "ignore.dic");
-	  }
-
-	  public virtual void testExamples()
-	  {
-		assertStemsTo("drink", "drink");
-		assertStemsTo("drinkable", "drink");
-		assertStemsTo("dr'ink-able", "drink");
-		assertStemsTo("drank-able", "drank");
-		assertStemsTo("'-'-'-");
-	  }
-	}
+    public class TestIgnore : StemmerTestBase
+    {
 
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("ignore.aff", "ignore.dic");
+        }
+        [Test]
+        public virtual void TestExamples()
+        {
+            AssertStemsTo("drink", "drink");
+            AssertStemsTo("drinkable", "drink");
+            AssertStemsTo("dr'ink-able", "drink");
+            AssertStemsTo("drank-able", "drank");
+            AssertStemsTo("'-'-'-");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
index 20a1d77..f371ec4 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestMorph.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,25 +19,20 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestMorph : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("morph.aff", "morph.dic");
-	  }
-
-	  public virtual void testExamples()
-	  {
-		assertStemsTo("drink", "drink");
-		assertStemsTo("drinkable", "drink");
-		assertStemsTo("drinkableable");
-	  }
-	}
+    public class TestMorph : StemmerTestBase
+    {
 
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("morph.aff", "morph.dic");
+        }
+        [Test]
+        public virtual void TestExamples()
+        {
+            AssertStemsTo("drink", "drink");
+            AssertStemsTo("drinkable", "drink");
+            AssertStemsTo("drinkableable");
+        }
+    }
 }
\ No newline at end of file


[19/50] [abbrv] lucenenet git commit: Removed test attribute from Queries.BooleanFilterTest.TstFilterCard() and Facet.Taxonomy.Directory.TestDirectoryTaxonomyWriter.TouchTaxo(), which are clearly not tests.

Posted by sy...@apache.org.
Removed test attribute from Queries.BooleanFilterTest.TstFilterCard() and Facet.Taxonomy.Directory.TestDirectoryTaxonomyWriter.TouchTaxo(), which are clearly not tests.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/828819df
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/828819df
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/828819df

Branch: refs/heads/analysis-work
Commit: 828819dfa674cbd90a7c6422ac8ec12b20e1dd06
Parents: 80c9e8c
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 07:05:14 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 07:05:14 2016 +0700

----------------------------------------------------------------------
 .../Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs               | 1 -
 src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs                   | 1 -
 2 files changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/828819df/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs
index 24b10f7..60e90c2 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/Directory/TestDirectoryTaxonomyWriter.cs
@@ -193,7 +193,6 @@ namespace Lucene.Net.Facet.Taxonomy.Directory
             dir.Dispose();
         }
 
-        [Test]
         private void TouchTaxo(DirectoryTaxonomyWriter taxoWriter, FacetLabel cp)
         {
             taxoWriter.AddCategory(cp);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/828819df/src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs b/src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs
index 3b5e22a..9d37765 100644
--- a/src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs
+++ b/src/Lucene.Net.Tests.Queries/BooleanFilterTest.cs
@@ -139,7 +139,6 @@ namespace Lucene.Net.Tests.Queries
             }
         }
 
-        [Test]
         private void TstFilterCard(string mes, int expected, Filter filt)
         {
             DocIdSet docIdSet = filt.GetDocIdSet(reader.AtomicContext, reader.LiveDocs);


[10/50] [abbrv] lucenenet git commit: Fixed bugs in KStemmer that were causing tests TestKStemmer.TestVocabulary and TestKStemmer.TestRandomStrings to fail.

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c185dc/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
index 4f04f68..db4b91f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemData7.cs
@@ -47,6 +47,707 @@ namespace Lucene.Net.Analysis.En
         private KStemData7()
         {
         }
-        internal static string[] data = new string[] { "rupee", "rupture", "rural", "ruritanian", "ruse", "rush", "rushes", "rushlight", "rusk", "russet", "rust", "rustic", "rusticate", "rustication", "rustle", "rustler", "rustless", "rustling", "rustproof", "rusty", "rut", "ruthless", "rutting", "rye", "sabbatarian", "sabbath", "sabbatical", "saber", "sable", "sabot", "sabotage", "saboteur", "sabra", "sabre", "sac", "saccharin", "saccharine", "sacerdotal", "sacerdotalism", "sachet", "sack", "sackbut", "sackcloth", "sacral", "sacrament", "sacramental", "sacred", "sacrifice", "sacrificial", "sacrilege", "sacrilegious", "sacristan", "sacristy", "sacroiliac", "sacrosanct", "sad", "sadden", "saddle", "saddlebag", "saddler", "saddlery", "sadducee", "sadhu", "sadism", "sadly", "sadomasochism", "safari", "safe", "safebreaker", "safeguard", "safekeeping", "safety", "saffron", "sag", "saga", "sagacious", "sagacity", "sagebrush", "sago", "sahib", "said", "sail", "sailcloth", "sailing", "sailo
 r", "sailplane", "saint", "sainted", "saintly", "saith", "sake", "saki", "salaam", "salable", "salacious", "salacity", "salad", "salamander", "salami", "salaried", "salary", "sale", "saleable", "saleroom", "sales", "salesclerk", "salesgirl", "saleslady", "salesman", "salesmanship", "salient", "saliferous", "salify", "saline", "salinometer", "saliva", "salivary", "salivate", "sallow", "sally", "salmon", "salmonella", "salon", "saloon", "salsify", "salt", "saltcellar", "saltire", "saltlick", "saltpan", "saltpeter", "saltpetre", "salts", "saltshaker", "saltwater", "salty", "salubrious", "salutary", "salutation", "salute", "salvage", "salvation", "salvationist", "salve", "salvedge", "salver", "salvia", "salvo", "samaritan", "samaritans", "samba", "same", "sameness", "samovar", "sampan", "sample", "sampler", "samurai", "sanatorium", "sanctify", "sanctimonious", "sanction", "sanctities", "sanctity", "sanctuary", "sanctum", "sanctus", "sand", "sandal", "sandalwood", "sandbag", "sandbank", 
 "sandbar", "sandblast", "sandbox", "sandboy", "sandcastle", "sander", "sandglass", "sandman", "sandpaper", "sandpiper", "sandpit", "sands", "sandshoe", "sandstone", "sandstorm", "sandwich", "sandy", "sane", "sang", "sangfroid", "sangria", "sanguinary", "sanguine", "sanitary", "sanitation", "sanitorium", "sanity", "sank", "sans", "sanskrit", "sap", "sapience", "sapient", "sapless", "sapling", "sapper", "sapphic", "sapphire", "sappy", "sapwood", "saraband", "sarabande", "sarcasm", "sarcastic", "sarcophagus", "sardine", "sardonic", "sarge", "sari", "sarky", "sarong", "sarsaparilla", "sartorial", "sash", "sashay", "sass", "sassafras", "sassy", "sat", "satan", "satanic", "satanism", "satchel", "sate", "sateen", "satellite", "satiable", "satiate", "satiety", "satin", "satinwood", "satiny", "satire", "satirical", "satirise", "satirize", "satisfaction", "satisfactory", "satisfy", "satisfying", "satrap", "satsuma", "saturate", "saturation", "saturday", "saturn", "saturnalia", "saturnine", "s
 atyr", "sauce", "saucepan", "saucer", "saucy", "sauerkraut", "sauna", "saunter", "saurian", "sausage", "sauterne", "sauternes", "savage", "savagery", "savanna", "savannah", "savant", "save", "saveloy", "saver", "saving", "savings", "savior", "saviour", "savor", "savory", "savour", "savoury", "savoy", "savvy", "saw", "sawbones", "sawbuck", "sawdust", "sawhorse", "sawmill", "sawpit", "sawyer", "saxifrage", "saxon", "saxophone", "saxophonist", "say", "saying", "scab", "scabbard", "scabby", "scabies", "scabious", "scabrous", "scads", "scaffold", "scaffolding", "scalar", "scalawag", "scald", "scalding", "scale", "scalene", "scallion", "scallop", "scallywag", "scalp", "scalpel", "scaly", "scamp", "scamper", "scampi", "scan", "scandal", "scandalise", "scandalize", "scandalmonger", "scandalous", "scandinavian", "scanner", "scansion", "scant", "scanty", "scapegoat", "scapegrace", "scapula", "scar", "scarab", "scarce", "scarcely", "scarcity", "scare", "scarecrow", "scared", "scaremonger", "sc
 arf", "scarify", "scarlet", "scarp", "scarper", "scary", "scat", "scathing", "scatology", "scatter", "scatterbrain", "scatterbrained", "scattered", "scatty", "scavenge", "scavenger", "scenario", "scenarist", "scene", "scenery", "sceneshifter", "scenic", "scent", "scepter", "sceptic", "sceptical", "scepticism", "sceptre", "schedule", "schema", "schematic", "schematize", "scheme", "scherzo", "schism", "schismatic", "schist", "schizoid", "schizophrenia", "schizophrenic", "schmaltz", "schmalz", "schnapps", "schnitzel", "schnorkel", "scholar", "scholarly", "scholarship", "scholastic", "scholasticism", "school", "schoolboy", "schoolhouse", "schooling", "schoolman", "schoolmarm", "schoolmaster", "schoolmastering", "schoolmate", "schoolwork", "schooner", "schwa", "sciatic", "sciatica", "science", "scientific", "scientist", "scientology", "scimitar", "scintilla", "scintillate", "scion", "scissor", "scissors", "sclerosis", "scoff", "scold", "scollop", "sconce", "scone", "scoop", "scoot", "sco
 oter", "scope", "scorbutic", "scorch", "scorcher", "scorching", "score", "scoreboard", "scorebook", "scorecard", "scorekeeper", "scoreless", "scorer", "scorn", "scorpio", "scorpion", "scotch", "scoundrel", "scoundrelly", "scour", "scourer", "scourge", "scout", "scoutmaster", "scow", "scowl", "scrabble", "scrag", "scraggly", "scraggy", "scram", "scramble", "scrap", "scrapbook", "scrape", "scraper", "scrapings", "scrappy", "scraps", "scratch", "scratchpad", "scratchy", "scrawl", "scrawny", "scream", "screamingly", "scree", "screech", "screed", "screen", "screening", "screenplay", "screw", "screwball", "screwdriver", "screwy", "scribble", "scribbler", "scribe", "scrimmage", "scrimp", "scrimshank", "scrimshaw", "scrip", "script", "scripted", "scriptural", "scripture", "scriptwriter", "scrivener", "scrofula", "scrofulous", "scroll", "scrollwork", "scrooge", "scrotum", "scrounge", "scrub", "scrubber", "scrubby", "scruff", "scruffy", "scrum", "scrumcap", "scrumhalf", "scrummage", "scrumpti
 ous", "scrumpy", "scrunch", "scruple", "scrupulous", "scrutineer", "scrutinise", "scrutinize", "scrutiny", "scuba", "scud", "scuff", "scuffle", "scull", "scullery", "scullion", "sculptor", "sculptural", "sculpture", "scum", "scupper", "scurf", "scurrility", "scurrilous", "scurry", "scurvy", "scut", "scutcheon", "scuttle", "scylla", "scythe", "sea", "seabed", "seabird", "seaboard", "seaborne", "seafaring", "seafood", "seafront", "seagirt", "seagoing", "seagull", "seahorse", "seakale", "seal", "sealer", "sealing", "sealskin", "sealyham", "seam", "seaman", "seamanlike", "seamanship", "seamstress", "seamy", "seaplane", "seaport", "sear", "search", "searching", "searchlight", "searing", "seascape", "seashell", "seashore", "seasick", "seaside", "season", "seasonable", "seasonal", "seasoning", "seat", "seating", "seawall", "seaward", "seawards", "seawater", "seaway", "seaweed", "seaworthy", "sec", "secateurs", "secede", "secession", "seclude", "secluded", "seclusion", "seclusive", "second"
 , "secondary", "seconds", "secrecy", "secret", "secretarial", "secretariat", "secretary", "secrete", "secretion", "secretive", "sect", "sectarian", "section", "sectional", "sectionalism", "sector", "secular", "secularise", "secularism", "secularize", "secure", "security", "sedan", "sedate", "sedation", "sedative", "sedentary", "sedge", "sediment", "sedimentary", "sedimentation", "sedition", "seditious", "seduce", "seduction", "seductive", "sedulous", "see", "seed", "seedbed", "seedcake", "seedling", "seedsman", "seedy", "seeing", "seek", "seem", "seeming", "seemingly", "seemly", "seen", "seep", "seepage", "seer", "seersucker", "seesaw", "seethe", "segment", "segmentation", "segregate", "segregated", "segregation", "seigneur", "seine", "seismic", "seismograph", "seismology", "seize", "seizure", "seldom", "select", "selection", "selective", "selector", "selenium", "self", "selfish", "selfless", "selfsame", "sell", "seller", "sellotape", "selvage", "selves", "semantic", "semantics", "s
 emaphore", "semblance", "semeiology", "semen", "semester", "semibreve", "semicircle", "semicolon", "semiconductor", "semidetached", "semifinal", "semifinalist", "seminal", "seminar", "seminarist", "seminary", "semiology", "semiprecious", "semiquaver", "semitic", "semitone", "semitropical", "semivowel", "semiweekly", "semolina", "sempstress", "sen", "senate", "senator", "senatorial", "send", "sender", "senescence", "senescent", "seneschal", "senile", "senility", "senior", "seniority", "senna", "sensation", "sensational", "sensationalism", "sense", "senseless", "senses", "sensibility", "sensible", "sensitise", "sensitive", "sensitivity", "sensitize", "sensor", "sensory", "sensual", "sensualist", "sensuality", "sensuous", "sent", "sentence", "sententious", "sentient", "sentiment", "sentimental", "sentimentalise", "sentimentalism", "sentimentality", "sentimentalize", "sentinel", "sentry", "sepal", "separable", "separate", "separation", "separatism", "separator", "sepia", "sepoy", "sepsi
 s", "september", "septet", "septic", "septicaemia", "septicemia", "septuagenarian", "septuagesima", "septuagint", "sepulcher", "sepulchral", "sepulchre", "sequel", "sequence", "sequencing", "sequent", "sequential", "sequester", "sequestrate", "sequestration", "sequin", "sequoia", "seraglio", "seraph", "seraphic", "sere", "serenade", "serendipity", "serene", "serf", "serfdom", "serge", "sergeant", "serial", "serialise", "serialize", "seriatim", "sericulture", "series", "serif", "seriocomic", "serious", "seriously", "sermon", "sermonise", "sermonize", "serous", "serpent", "serpentine", "serrated", "serried", "serum", "serval", "servant", "serve", "server", "servery", "service", "serviceable", "serviceman", "serviette", "servile", "serving", "servitor", "servitude", "servomechanism", "servomotor", "sesame", "session", "sessions", "set", "setback", "setscrew", "setsquare", "sett", "settee", "setter", "setting", "settle", "settled", "settlement", "settler", "seven", "seventeen", "seventy
 ", "sever", "several", "severally", "severance", "severity", "sew", "sewage", "sewer", "sewerage", "sewing", "sex", "sexagenarian", "sexagesima", "sexism", "sexist", "sexless", "sextant", "sextet", "sexton", "sextuplet", "sexual", "sexuality", "sexy", "sforzando", "sgt", "shabby", "shack", "shackle", "shad", "shade", "shades", "shading", "shadow", "shadowbox", "shadowy", "shady", "shaft", "shag", "shagged", "shaggy", "shagreen", "shah", "shake", "shakedown", "shaker", "shakes", "shako", "shaky", "shale", "shall", "shallop", "shallot", "shallow", "shallows", "shalom", "shalt", "sham", "shaman", "shamble", "shambles", "shame", "shamefaced", "shameful", "shameless", "shammy", "shampoo", "shamrock", "shandy", "shanghai", "shank", "shantung", "shanty", "shantytown", "shape", "shaped", "shapely", "shard", "share", "sharecropper", "shareholder", "shares", "shark", "sharkskin", "sharp", "sharpen", "sharpener", "sharper", "sharpshooter", "shatter", "shave", "shaver", "shaving", "shawl", "sha
 y", "she", "sheaf", "shear", "shears", "sheath", "sheathe", "sheathing", "shebang", "shebeen", "shed", "sheen", "sheep", "sheepdip", "sheepdog", "sheepfold", "sheepish", "sheepskin", "sheer", "sheet", "sheeting", "sheik", "sheikdom", "sheikh", "sheikhdom", "sheila", "shekels", "shelduck", "shelf", "shell", "shellac", "shellacking", "shellfish", "shellshock", "shelter", "sheltered", "shelve", "shelves", "shelving", "shenanigan", "shepherd", "shepherdess", "sheraton", "sherbet", "sherd", "sheriff", "sherpa", "sherry", "shew", "shh", "shibboleth", "shield", "shift", "shiftless", "shifty", "shilling", "shimmer", "shin", "shinbone", "shindig", "shindy", "shine", "shiner", "shingle", "shingles", "shining", "shinny", "shinto", "shiny", "ship", "shipboard", "shipbroker", "shipbuilding", "shipmate", "shipment", "shipper", "shipping", "shipshape", "shipwreck", "shipwright", "shipyard", "shire", "shires", "shirk", "shirring", "shirt", "shirtfront", "shirting", "shirtsleeve", "shirttail", "shir
 twaist", "shirtwaister", "shirty", "shit", "shits", "shitty", "shiver", "shivers", "shivery", "shoal", "shock", "shocker", "shockheaded", "shocking", "shockproof", "shod", "shoddy", "shoe", "shoeblack", "shoehorn", "shoelace", "shoemaker", "shoeshine", "shoestring", "shone", "shoo", "shook", "shoot", "shop", "shopkeeper", "shoplift", "shopsoiled", "shopworn", "shore", "shorn", "short", "shortage", "shortbread", "shortcake", "shortcoming", "shorten", "shortening", "shortfall", "shorthand", "shorthanded", "shorthorn", "shortie", "shortly", "shorts", "shortsighted", "shorty", "shot", "shotgun", "should", "shoulder", "shouldst", "shout", "shouting", "shove", "shovel", "shovelboard", "show", "showboat", "showcase", "showdown", "shower", "showery", "showgirl", "showing", "showman", "showmanship", "shown", "showpiece", "showplace", "showroom", "showy", "shrank", "shrapnel", "shred", "shredder", "shrew", "shrewd", "shrewish", "shriek", "shrift", "shrike", "shrill", "shrimp", "shrine", "shri
 nk", "shrinkage", "shrive", "shrivel", "shroud", "shrub", "shrubbery", "shrug", "shuck", "shucks", "shudder", "shuffle", "shuffleboard", "shufty", "shun", "shunt", "shunter", "shush", "shut", "shutdown", "shutter", "shuttle", "shuttlecock", "shy", "shyster", "sibilant", "sibling", "sibyl", "sibylline", "sic", "sick", "sickbay", "sickbed", "sicken", "sickening", "sickle", "sickly", "sickness", "sickroom", "side", "sidearm", "sideboard", "sideboards", "sidecar", "sidekick", "sidelight", "sideline", "sidelong", "sidereal", "sidesaddle", "sideshow", "sideslip", "sidesman", "sidesplitting", "sidestep", "sidestroke", "sideswipe", "sidetrack", "sidewalk", "sideward", "sidewards", "sideways", "siding", "sidle", "siege", "sienna", "sierra", "siesta", "sieve", "sift", "sifter", "sigh", "sight", "sighted", "sightless", "sightly", "sightscreen", "sightsee", "sightseer", "sign", "signal", "signaler", "signalise", "signalize", "signaller", "signally", "signalman", "signatory", "signature", "signe
 r", "signet", "significance", "significant", "signification", "signify", "signor", "signora", "signorina", "signpost", "signposted", "silage", "silence", "silencer", "silent", "silhouette", "silica", "silicate", "silicon", "silicone", "silicosis", "silk", "silken", "silkworm", "silky", "sill", "sillabub", "silly", "silo", "silt", "silvan", "silver", "silverfish", "silverside", "silversmith", "silverware", "silvery", "simian", "similar", "similarity", "similarly", "simile", "similitude", "simmer", "simony", "simper", "simple", "simpleton", "simplicity", "simplify", "simply", "simulacrum", "simulate", "simulated", "simulation", "simulator", "simultaneous", "sin", "since", "sincere", "sincerely", "sincerity", "sinecure", "sinew", "sinewy", "sinful", "sing", "singe", "singhalese", "singing", "single", "singleness", "singles", "singlestick", "singlet", "singleton", "singly", "singsong", "singular", "singularly", "sinhalese", "sinister", "sink", "sinker", "sinless", "sinner", "sinology", 
 "sinuous", "sinus", "sip", "siphon", "sir", "sire", "siren", "sirloin", "sirocco", "sirrah", "sis", "sisal", "sissy", "sister", "sisterhood", "sisterly", "sit", "sitar", "site", "sitter", "sitting", "situated", "situation", "six", "sixpence", "sixteen", "sixty", "sizable", "size", "sizeable", "sizzle", "sizzler", "skate", "skateboard", "skedaddle", "skeet", "skein", "skeleton", "skeptic", "skeptical", "skepticism", "sketch", "sketchpad", "sketchy", "skew", "skewbald", "skewer", "ski", "skibob", "skid", "skidlid", "skidpan", "skiff", "skiffle", "skilful", "skill", "skilled", "skillet", "skillful", "skim", "skimmer", "skimp", "skimpy", "skin", "skinflint", "skinful", "skinhead", "skinny", "skint", "skip", "skipper", "skirl", "skirmish", "skirt", "skit", "skitter", "skittish", "skittle", "skittles", "skive", "skivvy", "skua", "skulduggery", "skulk", "skull", "skullcap", "skullduggery", "skunk", "sky", "skydiving", "skyhook", "skyjack", "skylark", "skylight", "skyline", "skyrocket", "sk
 yscraper", "skywriting", "slab", "slack", "slacken", "slacker", "slacks", "slag", "slagheap", "slain", "slake", "slalom", "slam", "slander", "slanderous", "slang", "slangy", "slant", "slantwise", "slap", "slapdash", "slaphappy", "slapstick", "slash", "slat", "slate", "slattern", "slaty", "slaughter", "slaughterhouse", "slave", "slaver", "slavery", "slavic", "slavish", "slay", "sleazy", "sled", "sledge", "sledgehammer", "sleek", "sleep", "sleeper", "sleepless", "sleepwalker", "sleepy", "sleepyhead", "sleet", "sleeve", "sleigh", "slender", "slenderise", "slenderize", "slept", "sleuth", "slew", "slewed", "slice", "slick", "slicker", "slide", "slight", "slightly", "slim", "slimy", "sling", "slingshot", "slink", "slip", "slipcover", "slipknot", "slipover", "slipper", "slippery", "slippy", "slips", "slipshod", "slipstream", "slipway", "slit", "slither", "slithery", "sliver", "slivovitz", "slob", "slobber", "sloe", "slog", "slogan", "sloop", "slop", "slope", "sloppy", "slosh", "sloshed", "
 slot", "sloth", "slothful", "slouch", "slough", "sloven", "slovenly", "slow", "slowcoach", "slowworm", "sludge", "slue", "slug", "sluggard", "sluggish", "sluice", "sluiceway", "slum", "slumber", "slumberous", "slummy", "slump", "slung", "slunk", "slur", "slurp", "slurry", "slush", "slut", "sly", "smack", "smacker", "small", "smallholder", "smallholding", "smallpox", "smalls", "smarmy", "smart", "smarten", "smash", "smashed", "smasher", "smashing", "smattering", "smear", "smell", "smelly", "smelt", "smile", "smirch", "smirk", "smite", "smith", "smithereens", "smithy", "smitten", "smock", "smocking", "smog", "smoke", "smoker", "smokescreen", "smokestack", "smoking", "smoky", "smolder", "smooch", "smooth", "smoothie", "smoothy", "smorgasbord", "smote", "smother", "smoulder", "smudge", "smug", "smuggle", "smut", "smutty", "snack", "snaffle", "snag", "snail", "snake", "snakebite", "snaky", "snap", "snapdragon", "snapper", "snappish", "snappy", "snapshot", "snare", "snarl", "snatch", "sna
 zzy", "sneak", "sneaker", "sneaking", "sneaky", "sneer", "sneeze", "snick", "snicker", "snide", "sniff", "sniffle", "sniffles", "sniffy", "snifter", "snigger", "snip", "snippet", "snips", "snitch", "snivel", "snob", "snobbery", "snobbish", "snog", "snood", "snook", "snooker", "snoop", "snooper", "snoot", "snooty", "snooze", "snore", "snorkel", "snort", "snorter", "snot", "snotty", "snout", "snow", "snowball", "snowberry", "snowbound", "snowdrift", "snowdrop", "snowfall", "snowfield", "snowflake", "snowline", "snowman", "snowplough", "snowplow", "snowshoe", "snowstorm", "snowy", "snr", "snub", "snuff", "snuffer", "snuffle", "snug", "snuggle", "soak", "soaked", "soaking", "soap", "soapbox", "soapstone", "soapsuds", "soapy", "soar", "sob", "sober", "sobriety", "sobriquet", "soccer", "sociable", "social", "socialise", "socialism", "socialist", "socialite", "socialize", "society", "sociology", "sock", "socket", "sod", "soda", "sodden", "sodium", "sodomite", "sodomy", "soever", "sofa", "s
 oft", "softball", "soften", "softhearted", "softie", "software", "softwood", "softy", "soggy", "soigne", "soignee", "soil", "sojourn", "sol", "solace", "solar", "solarium", "sold", "solder", "soldier", "soldierly", "soldiery", "sole", "solecism", "solely", "solemn", "solemnise", "solemnity", "solemnize", "solicit", "solicitor", "solicitous", "solicitude", "solid", "solidarity", "solidify", "solidity", "solidus", "soliloquise", "soliloquize", "soliloquy", "solipsism", "solitaire", "solitary", "solitude", "solo", "soloist", "solstice", "soluble", "solution", "solve", "solvency", "solvent", "somber", "sombre", "sombrero", "some", "somebody", "someday", "somehow", "somersault", "something", "sometime", "sometimes", "someway", "somewhat", "somewhere", "somnambulism", "somnolent", "son", "sonar", "sonata", "song", "songbird", "songbook", "songster", "sonic", "sonnet", "sonny", "sonority", "sonorous", "sonsy", "soon", "soot", "soothe", "soothsayer", "sop", "sophism", "sophisticate", "sophi
 sticated", "sophistication", "sophistry", "sophomore", "soporific", "sopping", "soppy", "soprano", "sorbet", "sorcerer", "sorcery", "sordid", "sore", "sorehead", "sorely", "sorghum", "sorority", "sorrel", "sorrow", "sorry", "sort", "sortie", "sos", "sot", "sottish", "sou", "soubrette", "soubriquet", "sough", "sought", "soul", "soulful", "soulless", "sound", "soundings", "soundproof", "soundtrack", "soup", "sour", "source", "sourdough", "sourpuss", "sousaphone", "souse", "soused", "south", "southbound", "southeast", "southeaster", "southeasterly", "southeastern", "southeastward", "southeastwards", "southerly", "southern", "southerner", "southernmost", "southpaw", "southward", "southwards", "southwest", "southwester", "southwesterly", "southwestern", "southwestward", "southwestwards", "souvenir", "sovereign", "sovereignty", "soviet", "sow", "sox", "soy", "soybean", "sozzled", "spa", "space", "spacecraft", "spaceship", "spacesuit", "spacing", "spacious", "spade", "spadework", "spaghett
 i", "spake", "spam", "span", "spangle", "spaniel", "spank", "spanking", "spanner", "spar", "spare", "spareribs", "sparing", "spark", "sparkle", "sparkler", "sparks", "sparrow", "sparse", "spartan", "spasm", "spasmodic", "spastic", "spat", "spatchcock", "spate", "spatial", "spatter", "spatula", "spavin", "spawn", "spay", "speak", "speakeasy", "speaker", "speakership", "spear", "spearhead", "spearmint", "spec", "special", "specialise", "specialised", "specialist", "speciality", "specialize", "specialized", "specially", "specie", "species", "specific", "specifically", "specification", "specifics", "specify", "specimen", "specious", "speck", "speckle", "spectacle", "spectacled", "spectacles", "spectacular", "spectator", "specter", "spectral", "spectre", "spectroscope", "spectrum", "speculate", "speculation", "speculative", "speech", "speechify", "speechless", "speed", "speedboat", "speeding", "speedometer", "speedway", "speedwell", "speedy", "spelaeology", "speleology", "spell", "spellb
 ind", "spelling", "spend", "spender", "spendthrift", "spent", "sperm", "spermaceti", "spermatozoa", "spew", "sphagnum", "sphere", "spherical", "spheroid", "sphincter", "sphinx", "spice", "spicy", "spider", "spidery", "spiel", "spigot", "spike", "spikenard", "spiky", "spill", "spillover", "spillway", "spin", "spinach", "spinal", "spindle", "spindly", "spine", "spineless", "spinet", "spinnaker", "spinner", "spinney", "spinster", "spiny", "spiral", "spire", "spirit", "spirited", "spiritless", "spirits", "spiritual", "spiritualise", "spiritualism", "spirituality", "spiritualize", "spirituous", "spirt", "spit", "spite", "spitfire", "spittle", "spittoon", "spiv", "splash", "splashy", "splat", "splatter", "splay", "splayfoot", "spleen", "splendid", "splendiferous", "splendor", "splendour", "splenetic", "splice", "splicer", "splint", "splinter", "split", "splits", "splitting", "splotch", "splurge", "splutter", "spoil", "spoilage", "spoils", "spoilsport", "spoke", "spoken", "spokeshave", "sp
 okesman", "spoliation", "spondee", "sponge", "spongy", "sponsor", "spontaneous", "spoof", "spook", "spooky", "spool", "spoon", "spoonerism", "spoonful", "spoor", "sporadic", "spore", "sporran", "sport", "sporting", "sportive", "sports", "sportsman", "sportsmanlike", "sportsmanship", "sporty", "spot", "spotless", "spotlight", "spotted", "spotter", "spotty", "spouse", "spout", "sprain", "sprang", "sprat", "sprawl", "spray", "sprayer", "spread", "spree", "sprig", "sprigged", "sprightly", "spring", "springboard", "springbok", "springtime", "springy", "sprinkle", "sprinkler", "sprinkling", "sprint", "sprite", "sprocket", "sprout", "spruce", "sprung", "spry", "spud", "spume", "spun", "spunk", "spur", "spurious", "spurn", "spurt", "sputter", "sputum", "spy", "spyglass", "squab", "squabble", "squad", "squadron", "squalid", "squall", "squalor", "squander", "square", "squash", "squashy", "squat", "squatter", "squaw", "squawk", "squeak", "squeaky", "squeal", "squeamish", "squeegee", "squeeze",
  "squeezer", "squelch", "squib", "squid", "squidgy", "squiffy", "squiggle", "squint", "squirarchy", "squire", "squirearchy", "squirm", "squirrel", "squirt", "squirter", "sri", "srn", "ssh", "stab", "stabbing", "stabilise", "stabiliser", "stability", "stabilize", "stabilizer", "stable", "stabling", "staccato", "stack", "stadium", "staff", "stag", "stage", "stagecoach", "stager", "stagestruck", "stagger", "staggering", "staggers", "staging", "stagnant", "stagnate", "stagy", "staid", "stain", "stainless", "stair", "staircase", "stairs", "stairwell", "stake", "stakeholder", "stakes", "stalactite", "stalagmite", "stale", "stalemate", "stalk", "stall", "stallholder", "stallion", "stalls", "stalwart", "stamen", "stamina", "stammer", "stamp", "stampede", "stance", "stanch", "stanchion", "stand", "standard", "standardise", "standardize", "standby", "standing", "standoffish", "standpipe", "standpoint", "standstill", "stank", "stanza", "staple", "stapler", "star", "starboard", "starch", "starc
 hy", "stardom", "stardust", "stare", "starfish", "stargazer", "stargazing", "staring", "stark", "starkers", "starlet", "starlight", "starling", "starlit", "starry", "stars", "start", "starter", "starters", "startle", "starvation", "starve", "starveling", "stash", "state", "statecraft", "statehood", "stateless", "stately", "statement", "stateroom", "states", "stateside", "statesman", "static", "statics", "station", "stationary", "stationer", "stationery", "stationmaster", "statistic", "statistician", "statistics", "statuary", "statue", "statuesque", "statuette", "stature", "status", "statute", "statutory", "staunch", "stave", "staves", "stay", "stayer", "stays", "std", "stead", "steadfast", "steady", "steak", "steal", "stealth", "stealthy", "steam", "steamboat", "steamer", "steamroller", "steamship", "steed", "steel", "steelworker", "steelworks", "steely", "steelyard", "steenbok", "steep", "steepen", "steeple", "steeplechase", "steeplejack", "steer", "steerage", "steerageway", "steer
 sman", "stein", "steinbok", "stele", "stellar", "stem", "stench", "stencil", "stenographer", "stenography", "stentorian", "step", "stepbrother", "stepchild", "stepladder", "stepparent", "steps", "stepsister", "stereo", "stereoscope", "stereoscopic", "stereotype", "sterile", "sterilise", "sterility", "sterilize", "sterling", "stern", "sternum", "steroid", "stertorous", "stet", "stethoscope", "stetson", "stevedore", "stew", "steward", "stewardess", "stewardship", "stewed", "stick", "sticker", "stickleback", "stickler", "stickpin", "sticks", "sticky", "stiff", "stiffen", "stiffener", "stiffening", "stifle", "stigma", "stigmata", "stigmatise", "stigmatize", "stile", "stiletto", "still", "stillbirth", "stillborn", "stillroom", "stilly", "stilt", "stilted", "stilton", "stimulant", "stimulate", "stimulus", "sting", "stinger", "stingo", "stingray", "stingy", "stink", "stinking", "stint", "stipend", "stipendiary", "stipple", "stipulate", "stipulation", "stir", "stirrer", "stirring", "stirrup
 ", "stitch", "stoat", "stock", "stockade", "stockbreeder", "stockbroker", "stockcar", "stockfish", "stockholder", "stockily", "stockinet", "stockinette", "stocking", "stockist", "stockjobber", "stockman", "stockpile", "stockpot", "stockroom", "stocks", "stocktaking", "stocky", "stockyard", "stodge", "stodgy", "stoic", "stoical", "stoicism", "stoke", "stokehold", "stoker", "stole", "stolen", "stolid", "stomach", "stomachache", "stomachful", "stomp", "stone", "stonebreaker", "stonecutter", "stoned", "stoneless", "stonemason", "stonewall", "stoneware", "stonework", "stony", "stood", "stooge", "stool", "stoolpigeon", "stoop", "stop", "stopcock", "stopgap", "stopover", "stoppage", "stopper", "stopping", "stopwatch", "storage", "store", "storehouse", "storekeeper", "storeroom", "stores", "storey", "storied", "stork", "storm", "stormbound", "stormy", "story", "storybook", "storyteller", "stoup", "stout", "stouthearted", "stove", "stovepipe", "stow", "stowage", "stowaway", "straddle", "stra
 divarius", "strafe", "straggle", "straggly", "straight", "straightaway", "straightedge", "straighten", "straightforward", "straightway", "strain", "strained", "strainer", "strait", "straitened", "straitjacket", "straitlaced", "straits", "strand", "stranded", "strange", "stranger", "strangle", "stranglehold", "strangulate", "strangulation", "strap", "straphanging", "strapless", "strapping", "strata", "stratagem", "strategic", "strategist", "strategy", "stratification", "stratify", "stratosphere", "stratum", "straw", "strawberry", "strawboard", "stray", "streak", "streaker", "streaky", "stream", "streamer", "streamline", "streamlined", "street", "streetcar", "streetwalker", "strength", "strengthen", "strenuous", "streptococcus", "streptomycin", "stress", "stretch", "stretcher", "stretchy", "strew", "strewth", "striated", "striation", "stricken", "strict", "stricture", "stride", "stridency", "strident", "stridulate", "strife", "strike", "strikebound", "strikebreaker", "strikebreaking",
  "striker", "striking", "string", "stringency", "stringent", "strings", "stringy", "strip", "stripe", "striped", "stripling", "stripper", "striptease", "stripy", "strive", "strode", "stroke", "stroll", "stroller", "strolling", "strong", "strongarm", "strongbox", "stronghold", "strontium", "strop", "strophe", "stroppy", "strove", "struck", "structural", "structure", "strudel", "struggle", "strum", "strumpet", "strung", "strut", "strychnine", "stub", "stubble", "stubborn", "stubby", "stucco", "stuck", "stud", "studbook", "student", "studied", "studio", "studious", "study", "stuff", "stuffing", "stuffy", "stultify", "stumble", "stump", "stumper", "stumpy", "stun", "stung", "stunk", "stunner", "stunning", "stunt", "stupefaction", "stupefy", "stupendous", "stupid", "stupidity", "stupor", "sturdy", "sturgeon", "stutter", "sty", "stye", "stygian", "style", "stylise", "stylish", "stylist", "stylistic", "stylistics", "stylize", "stylus", "stymie", "styptic", "suasion", "suave", "sub", "subal
 tern", "subatomic", "subcommittee", "subconscious", "subcontinent", "subcontract", "subcontractor", "subcutaneous", "subdivide", "subdue", "subdued", "subedit", "subeditor", "subheading", "subhuman", "subject", "subjection", "subjective", "subjoin", "subjugate", "subjunctive", "sublease", "sublet", "sublieutenant", "sublimate", "sublime", "subliminal", "submarine", "submariner", "submerge", "submergence", "submersible", "submission", "submissive", "submit", "subnormal", "suborbital", "subordinate", "suborn", "subplot", "subpoena", "subscribe", "subscriber", "subscription", "subsequent", "subservience", "subservient", "subside", "subsidence", "subsidiary", "subsidise", "subsidize", "subsidy", "subsist", "subsistence", "subsoil", "subsonic", "substance", "substandard", "substantial", "substantially", "substantiate", "substantival", "substantive", "substation", "substitute", "substratum", "substructure", "subsume", "subtenant", "subtend", "subterfuge", "subterranean", "subtitle", "subt
 itles", "subtle", "subtlety", "subtopia", "subtract", "subtraction", "subtropical", "suburb", "suburban", "suburbanite", "suburbia", "suburbs", "subvention", "subversive", "subvert", "subway", "succeed", "success", "successful", "succession", "successive", "successor", "succinct", "succor", "succour", "succubus", "succulence", "succulent", "succumb", "such", "suchlike", "suck", "sucker", "suckle", "suckling", "sucrose", "suction", "sudden", "suds", "sue", "suet", "suffer", "sufferable", "sufferance", "sufferer", "suffering", "suffice", "sufficiency", "sufficient", "suffix", "suffocate", "suffragan", "suffrage", "suffragette", "suffuse", "sugar", "sugarcane", "sugarcoated", "sugarloaf", "sugary", "suggest", "suggestible", "suggestion", "suggestive", "suicidal", "suicide", "suit", "suitability", "suitable", "suitcase", "suiting", "suitor", "sulfate", "sulfide", "sulfur", "sulfuret", "sulfurous", "sulk", "sulks", "sulky", "sullen", "sully", "sulphate", "sulphide", "sulphur", "sulphuret
 ", "sulphurous", "sultan", "sultana", "sultanate", "sultry", "sum", "sumac", "sumach", "summarise", "summarize", "summary", "summat", "summation", "summer", "summerhouse", "summertime", "summery", "summit", "summon", "summons", "sump", "sumptuary", "sumptuous", "sun", "sunbaked", "sunbathe", "sunbeam", "sunblind", "sunbonnet", "sunburn", "sunburnt", "sundae", "sunday", "sundeck", "sunder", "sundew", "sundial", "sundown", "sundowner", "sundrenched", "sundries", "sundry", "sunfish", "sunflower", "sung", "sunglasses", "sunk", "sunken", "sunlamp", "sunless", "sunlight", "sunlit", "sunny", "sunray", "sunrise", "sunroof", "sunset", "sunshade", "sunshine", "sunspot", "sunstroke", "suntan", "suntrap", "sup", "super", "superabundance", "superabundant", "superannuate", "superannuated", "superannuation", "superb", "supercharged", "supercharger", "supercilious", "superconductivity", "superduper", "superego", "superficial", "superficies", "superfine", "superfluity", "superfluous", "superhuman", 
 "superimpose", "superintend", "superintendent", "superior", "superlative", "superlatively", "superman", "supermarket", "supernal", "supernatural", "supernova", "supernumerary", "superscription", "supersede", "supersession", "supersonic", "superstar", "superstition", "superstitious", "superstructure", "supertax", "supervene", "supervise", "supervisory", "supine", "supper", "supplant", "supple", "supplement", "supplementary", "suppliant", "supplicant", "supplicate", "supplier", "supplies", "supply", "support", "supportable", "supporter", "supportive", "suppose", "supposed", "supposedly", "supposing", "supposition", "suppository", "suppress", "suppression", "suppressive", "suppressor", "suppurate", "supranational", "supremacist", "supremacy", "supreme", "surcharge", "surcoat", "surd", "sure", "surefire", "surefooted", "surely", "surety", "surf", "surface", "surfboard", "surfboat", "surfeit", "surfer", "surge", "surgeon", "surgery", "surgical", "surly", "surmise", "surmount", "surname",
  "surpass", "surpassing", "surplice", "surplus", "surprise", "surprising", "surreal", "surrealism", "surrealist", "surrealistic", "surrender", "surreptitious", "surrey", "surrogate", "surround", "surrounding", "surroundings", "surtax", "surveillance", "survey", "surveyor", "survival", "survive", "survivor", "susceptibilities", "susceptibility", "susceptible", "suspect", "suspend", "suspender", "suspenders", "suspense", "suspension", "suspicion", "suspicious", "sustain", "sustenance", "suttee", "suture", "suzerain", "suzerainty", "svelte", "swab", "swaddle", "swag", "swagger", "swain", "swallow", "swallowtailed", "swam", "swami", "swamp", "swampy", "swan", "swank", "swanky", "swansdown", "swansong", "swap", "sward", "swarf", "swarm", "swarthy", "swashbuckler", "swashbuckling", "swastika", "swat", "swatch", "swath", "swathe", "swatter", "sway", "swayback", "swear", "swearword", "sweat", "sweatband", "sweated", "sweater", "sweatshirt", "sweatshop", "sweaty", "swede", "sweep", "sweeper"
 , "sweeping", "sweepings", "sweepstake", "sweepstakes", "sweet", "sweetbread", "sweetbriar", "sweetbrier", "sweeten", "sweetener", "sweetening", "sweetheart", "sweetie", "sweetish", "sweetmeat", "sweets", "swell", "swelling", "swelter", "sweltering", "swept", "swerve", "swift", "swig", "swill", "swim", "swimming", "swimmingly", "swindle", "swine", "swineherd", "swing", "swingeing", "swinger", "swinging", "swinish", "swipe", "swirl", "swish", "switch", "switchback", "switchblade", "switchboard", "switchgear", "switchman", "swivel", "swiz", "swizzle", "swollen", "swoon", "swoop", "swop", "sword", "swordfish", "swordplay", "swordsman", "swordsmanship", "swordstick", "swore", "sworn", "swot", "swum", "swung", "sybarite", "sybaritic", "sycamore", "sycophant", "sycophantic", "sylabub", "syllabary", "syllabic", "syllabify", "syllable", "syllabub", "syllabus", "syllogism", "syllogistic", "sylph", "sylphlike", "sylvan", "symbiosis", "symbol", "symbolic", "symbolise", "symbolism", "symbolist"
 , "symbolize", "symmetrical", "symmetry", "sympathetic", "sympathies", "sympathise", "sympathize", "sympathy", "symphonic", "symphony", "symposium", "symptom", "symptomatic", "synagogue", "sync", "synch", "synchonise", "synchromesh", "synchronize", "synchrotron", "syncopate", "syncope", "syndic", "syndicalism", "syndicate", "syndrome", "synod", "synonym", "synonymous", "synopsis", "synoptic", "syntactic", "syntax", "synthesis", "synthesise", "synthesiser", "synthesize", "synthesizer", "synthetic", "syphilis", "syphilitic", "syphon", "syringe", "syrup", "syrupy", "system", "systematic", "systematise", "systematize", "systemic", "tab", "tabard", "tabasco", "tabby", "tabernacle", "table", "tableau", "tablecloth", "tableland", "tablemat", "tablespoon", "tablespoonful", "tablet", "tableware", "tabloid", "taboo", "tabor", "tabular", "tabulate", "tabulator", "tacit", "taciturn", "tack", "tackiness", "tackle", "tacky", "tact", "tactic", "tactical", "tactician", "tactics", "tactile", "tactua
 l", "tadpole", "taffeta", "taffrail", "taffy", "tag", "tail", "tailback", "tailboard", "tailcoat", "taillight", "tailor", "tailpiece", "tails", "tailspin", "tailwind", "taint", "take", "takeaway", "takeoff", "takeover", "taking", "takings", "talc", "tale", "talebearer", "talent", "talented", "talisman", "talk", "talkative", "talker", "talkie", "talks", "tall", "tallboy", "tallow", "tally", "tallyho", "tallyman", "talmud", "talon", "tamale", "tamarind", "tamarisk", "tambour", "tambourine", "tame", "tammany", "tamp", "tamper", "tampon", "tan", "tandem", "tang", "tangent", "tangential", "tangerine", "tangible", "tangle", "tango", "tank", "tankard", "tanker", "tanner", "tannery", "tannin", "tanning", "tannoy", "tansy", "tantalise", "tantalize", "tantalus", "tantamount", "tantrum", "taoism", "tap", "tape", "taper", "tapestry", "tapeworm", "tapioca", "tapir", "tappet", "taproom", "taproot", "taps", "tar", "tarantella", "tarantula", "tarboosh", "tardy", "target", "tariff", "tarmac", "tarn"
 , "tarnish", "taro", "tarot", "tarpaulin", "tarragon", "tarry", "tarsal", "tarsus", "tart", "tartan", "tartar", "task", "taskmaster", "tassel", "taste", "tasteful", "tasteless", "taster", "tasty", "tat", "tatas", "tatter", "tattered", "tatters", "tatting", "tattle", "tattoo", "tattooist", "tatty", "taught", "taunt", "taurus", "taut", "tautological", "tautology", "tavern", "tawdry", "tawny", "tawse", "tax", "taxation", "taxi", "taxidermist", "taxidermy", "taximeter", "taxonomy", "tea", "teabag", "teacake", "teach", "teacher", "teaching", "teacup", "teacupful", "teagarden", "teahouse", "teak", "teakettle", "teal", "tealeaf", "team", "teamster", "teamwork", "teapot", "tear", "tearaway", "teardrop", "tearful", "teargas", "tearjerker", "tearless", "tearoom", "tease", "teasel", "teaser", "teaspoon", "teaspoonful", "teat", "teatime", "teazle", "tech", "technical", "technicality", "technician", "technique", "technocracy", "technocrat", "technological", "technologist", "technology", "techy",
  "tedious", "tedium", "tee", "teem", "teeming", "teenage", "teenager", "teens", "teenybopper", "teeter", "teeth", "teethe", "teetotal", "teetotaler", "teetotaller", "teflon", "tegument", "tele", "telecast", "telecommunications", "telegram", "telegraph", "telegrapher", "telegraphese", "telegraphic", "telemarketing", "telemeter", "telemetry", "teleology", "telepathic", "telepathist", "telepathy", "telephone", "telephonist", "telephony", "telephotograph", "telephotography", "teleprinter", "teleprompter", "telescope", "telescopic", "televise", "television", "televisual", "telex", "telfer", "tell", "teller", "telling", "telltale", "telly", "telpher", "telstar", "temerity", "temp", "temper", "tempera", "temperament", "temperamental", "temperance", "temperate", "temperature", "tempest", "tempestuous", "template", "temple", "templet", "tempo", "temporal", "temporary", "temporise", "temporize", "tempt", "temptation", "ten", "tenable", "tenacious", "tenacity", "tenancy", "tenant", "tenantry",
  "tench", "tend", "tendency", "tendentious", "tender", "tenderfoot", "tenderhearted", "tenderise", "tenderize", "tenderloin", "tendon", "tendril", "tenement", "tenet", "tenner", "tennis", "tenon" };
+        internal static string[] data = new string[] {
+            "rupee","rupture","rural","ruritanian","ruse",
+            "rush","rushes","rushlight","rusk","russet",
+            "rust","rustic","rusticate","rustication","rustle",
+            "rustler","rustless","rustling","rustproof","rusty",
+            "rut","ruthless","rutting","rye","sabbatarian",
+            "sabbath","sabbatical","saber","sable","sabot",
+            "sabotage","saboteur","sabra","sabre","sac",
+            "saccharin","saccharine","sacerdotal","sacerdotalism","sachet",
+            "sack","sackbut","sackcloth","sacral","sacrament",
+            "sacramental","sacred","sacrifice","sacrificial","sacrilege",
+            "sacrilegious","sacristan","sacristy","sacroiliac","sacrosanct",
+            "sad","sadden","saddle","saddlebag","saddler",
+            "saddlery","sadducee","sadhu","sadism","sadly",
+            "sadomasochism","safari","safe","safebreaker","safeguard",
+            "safekeeping","safety","saffron","sag","saga",
+            "sagacious","sagacity","sagebrush","sago","sahib",
+            "said","sail","sailcloth","sailing","sailor",
+            "sailplane","saint","sainted","saintly","saith",
+            "sake","saki","salaam","salable","salacious",
+            "salacity","salad","salamander","salami","salaried",
+            "salary","sale","saleable","saleroom","sales",
+            "salesclerk","salesgirl","saleslady","salesman","salesmanship",
+            "salient","saliferous","salify","saline","salinometer",
+            "saliva","salivary","salivate","sallow","sally",
+            "salmon","salmonella","salon","saloon","salsify",
+            "salt","saltcellar","saltire","saltlick","saltpan",
+            "saltpeter","saltpetre","salts","saltshaker","saltwater",
+            "salty","salubrious","salutary","salutation","salute",
+            "salvage","salvation","salvationist","salve","salvedge",
+            "salver","salvia","salvo","samaritan","samaritans",
+            "samba","same","sameness","samovar","sampan",
+            "sample","sampler","samurai","sanatorium","sanctify",
+            "sanctimonious","sanction","sanctities","sanctity","sanctuary",
+            "sanctum","sanctus","sand","sandal","sandalwood",
+            "sandbag","sandbank","sandbar","sandblast","sandbox",
+            "sandboy","sandcastle","sander","sandglass","sandman",
+            "sandpaper","sandpiper","sandpit","sands","sandshoe",
+            "sandstone","sandstorm","sandwich","sandy","sane",
+            "sang","sangfroid","sangria","sanguinary","sanguine",
+            "sanitary","sanitation","sanitorium","sanity","sank",
+            "sans","sanskrit","sap","sapience","sapient",
+            "sapless","sapling","sapper","sapphic","sapphire",
+            "sappy","sapwood","saraband","sarabande","sarcasm",
+            "sarcastic","sarcophagus","sardine","sardonic","sarge",
+            "sari","sarky","sarong","sarsaparilla","sartorial",
+            "sash","sashay","sass","sassafras","sassy",
+            "sat","satan","satanic","satanism","satchel",
+            "sate","sateen","satellite","satiable","satiate",
+            "satiety","satin","satinwood","satiny","satire",
+            "satirical","satirise","satirize","satisfaction","satisfactory",
+            "satisfy","satisfying","satrap","satsuma","saturate",
+            "saturation","saturday","saturn","saturnalia","saturnine",
+            "satyr","sauce","saucepan","saucer","saucy",
+            "sauerkraut","sauna","saunter","saurian","sausage",
+            "sauterne","sauternes","savage","savagery","savanna",
+            "savannah","savant","save","saveloy","saver",
+            "saving","savings","savior","saviour","savor",
+            "savory","savour","savoury","savoy","savvy",
+            "saw","sawbones","sawbuck","sawdust","sawhorse",
+            "sawmill","sawpit","sawyer","saxifrage","saxon",
+            "saxophone","saxophonist","say","saying","scab",
+            "scabbard","scabby","scabies","scabious","scabrous",
+            "scads","scaffold","scaffolding","scalar","scalawag",
+            "scald","scalding","scale","scalene","scallion",
+            "scallop","scallywag","scalp","scalpel","scaly",
+            "scamp","scamper","scampi","scan","scandal",
+            "scandalise","scandalize","scandalmonger","scandalous","scandinavian",
+            "scanner","scansion","scant","scanty","scapegoat",
+            "scapegrace","scapula","scar","scarab","scarce",
+            "scarcely","scarcity","scare","scarecrow","scared",
+            "scaremonger","scarf","scarify","scarlet","scarp",
+            "scarper","scary","scat","scathing","scatology",
+            "scatter","scatterbrain","scatterbrained","scattered","scatty",
+            "scavenge","scavenger","scenario","scenarist","scene",
+            "scenery","sceneshifter","scenic","scent","scepter",
+            "sceptic","sceptical","scepticism","sceptre","schedule",
+            "schema","schematic","schematize","scheme","scherzo",
+            "schism","schismatic","schist","schizoid","schizophrenia",
+            "schizophrenic","schmaltz","schmalz","schnapps","schnitzel",
+            "schnorkel","scholar","scholarly","scholarship","scholastic",
+            "scholasticism","school","schoolboy","schoolhouse","schooling",
+            "schoolman","schoolmarm","schoolmaster","schoolmastering","schoolmate",
+            "schoolwork","schooner","schwa","sciatic","sciatica",
+            "science","scientific","scientist","scientology","scimitar",
+            "scintilla","scintillate","scion","scissor","scissors",
+            "sclerosis","scoff","scold","scollop","sconce",
+            "scone","scoop","scoot","scooter","scope",
+            "scorbutic","scorch","scorcher","scorching","score",
+            "scoreboard","scorebook","scorecard","scorekeeper","scoreless",
+            "scorer","scorn","scorpio","scorpion","scotch",
+            "scoundrel","scoundrelly","scour","scourer","scourge",
+            "scout","scoutmaster","scow","scowl","scrabble",
+            "scrag","scraggly","scraggy","scram","scramble",
+            "scrap","scrapbook","scrape","scraper","scrapings",
+            "scrappy","scraps","scratch","scratchpad","scratchy",
+            "scrawl","scrawny","scream","screamingly","scree",
+            "screech","screed","screen","screening","screenplay",
+            "screw","screwball","screwdriver","screwy","scribble",
+            "scribbler","scribe","scrimmage","scrimp","scrimshank",
+            "scrimshaw","scrip","script","scripted","scriptural",
+            "scripture","scriptwriter","scrivener","scrofula","scrofulous",
+            "scroll","scrollwork","scrooge","scrotum","scrounge",
+            "scrub","scrubber","scrubby","scruff","scruffy",
+            "scrum","scrumcap","scrumhalf","scrummage","scrumptious",
+            "scrumpy","scrunch","scruple","scrupulous","scrutineer",
+            "scrutinise","scrutinize","scrutiny","scuba","scud",
+            "scuff","scuffle","scull","scullery","scullion",
+            "sculptor","sculptural","sculpture","scum","scupper",
+            "scurf","scurrility","scurrilous","scurry","scurvy",
+            "scut","scutcheon","scuttle","scylla","scythe",
+            "sea","seabed","seabird","seaboard","seaborne",
+            "seafaring","seafood","seafront","seagirt","seagoing",
+            "seagull","seahorse","seakale","seal","sealer",
+            "sealing","sealskin","sealyham","seam","seaman",
+            "seamanlike","seamanship","seamstress","seamy","seaplane",
+            "seaport","sear","search","searching","searchlight",
+            "searing","seascape","seashell","seashore","seasick",
+            "seaside","season","seasonable","seasonal","seasoning",
+            "seat","seating","seawall","seaward","seawards",
+            "seawater","seaway","seaweed","seaworthy","sec",
+            "secateurs","secede","secession","seclude","secluded",
+            "seclusion","seclusive","second","secondary","seconds",
+            "secrecy","secret","secretarial","secretariat","secretary",
+            "secrete","secretion","secretive","sect","sectarian",
+            "section","sectional","sectionalism","sector","secular",
+            "secularise","secularism","secularize","secure","security",
+            "sedan","sedate","sedation","sedative","sedentary",
+            "sedge","sediment","sedimentary","sedimentation","sedition",
+            "seditious","seduce","seduction","seductive","sedulous",
+            "see","seed","seedbed","seedcake","seedling",
+            "seedsman","seedy","seeing","seek","seem",
+            "seeming","seemingly","seemly","seen","seep",
+            "seepage","seer","seersucker","seesaw","seethe",
+            "segment","segmentation","segregate","segregated","segregation",
+            "seigneur","seine","seismic","seismograph","seismology",
+            "seize","seizure","seldom","select","selection",
+            "selective","selector","selenium","self","selfish",
+            "selfless","selfsame","sell","seller","sellotape",
+            "selvage","selves","semantic","semantics","semaphore",
+            "semblance","semeiology","semen","semester","semibreve",
+            "semicircle","semicolon","semiconductor","semidetached","semifinal",
+            "semifinalist","seminal","seminar","seminarist","seminary",
+            "semiology","semiprecious","semiquaver","semitic","semitone",
+            "semitropical","semivowel","semiweekly","semolina","sempstress",
+            "sen","senate","senator","senatorial","send",
+            "sender","senescence","senescent","seneschal","senile",
+            "senility","senior","seniority","senna","sensation",
+            "sensational","sensationalism","sense","senseless","senses",
+            "sensibility","sensible","sensitise","sensitive","sensitivity",
+            "sensitize","sensor","sensory","sensual","sensualist",
+            "sensuality","sensuous","sent","sentence","sententious",
+            "sentient","sentiment","sentimental","sentimentalise","sentimentalism",
+            "sentimentality","sentimentalize","sentinel","sentry","sepal",
+            "separable","separate","separation","separatism","separator",
+            "sepia","sepoy","sepsis","september","septet",
+            "septic","septicaemia","septicemia","septuagenarian","septuagesima",
+            "septuagint","sepulcher","sepulchral","sepulchre","sequel",
+            "sequence","sequencing","sequent","sequential","sequester",
+            "sequestrate","sequestration","sequin","sequoia","seraglio",
+            "seraph","seraphic","sere","serenade","serendipity",
+            "serene","serf","serfdom","serge","sergeant",
+            "serial","serialise","serialize","seriatim","sericulture",
+            "series","serif","seriocomic","serious","seriously",
+            "sermon","sermonise","sermonize","serous","serpent",
+            "serpentine","serrated","serried","serum","serval",
+            "servant","serve","server","servery","service",
+            "serviceable","serviceman","serviette","servile","serving",
+            "servitor","servitude","servomechanism","servomotor","sesame",
+            "session","sessions","set","setback","setscrew",
+            "setsquare","sett","settee","setter","setting",
+            "settle","settled","settlement","settler","seven",
+            "seventeen","seventy","sever","several","severally",
+            "severance","severity","sew","sewage","sewer",
+            "sewerage","sewing","sex","sexagenarian","sexagesima",
+            "sexism","sexist","sexless","sextant","sextet",
+            "sexton","sextuplet","sexual","sexuality","sexy",
+            "sforzando","sgt","shabby","shack","shackle",
+            "shad","shade","shades","shading","shadow",
+            "shadowbox","shadowy","shady","shaft","shag",
+            "shagged","shaggy","shagreen","shah","shake",
+            "shakedown","shaker","shakes","shako","shaky",
+            "shale","shall","shallop","shallot","shallow",
+            "shallows","shalom","shalt","sham","shaman",
+            "shamble","shambles","shame","shamefaced","shameful",
+            "shameless","shammy","shampoo","shamrock","shandy",
+            "shanghai","shank","shantung","shanty","shantytown",
+            "shape","shaped","shapely","shard","share",
+            "sharecropper","shareholder","shares","shark","sharkskin",
+            "sharp","sharpen","sharpener","sharper","sharpshooter",
+            "shatter","shave","shaver","shaving","shawl",
+            "shay","she","sheaf","shear","shears",
+            "sheath","sheathe","sheathing","shebang","shebeen",
+            "shed","sheen","sheep","sheepdip","sheepdog",
+            "sheepfold","sheepish","sheepskin","sheer","sheet",
+            "sheeting","sheik","sheikdom","sheikh","sheikhdom",
+            "sheila","shekels","shelduck","shelf","shell",
+            "shellac","shellacking","shellfish","shellshock","shelter",
+            "sheltered","shelve","shelves","shelving","shenanigan",
+            "shepherd","shepherdess","sheraton","sherbet","sherd",
+            "sheriff","sherpa","sherry","shew","shh",
+            "shibboleth","shield","shift","shiftless","shifty",
+            "shilling","shimmer","shin","shinbone","shindig",
+            "shindy","shine","shiner","shingle","shingles",
+            "shining","shinny","shinto","shiny","ship",
+            "shipboard","shipbroker","shipbuilding","shipmate","shipment",
+            "shipper","shipping","shipshape","shipwreck","shipwright",
+            "shipyard","shire","shires","shirk","shirring",
+            "shirt","shirtfront","shirting","shirtsleeve","shirttail",
+            "shirtwaist","shirtwaister","shirty","shit","shits",
+            "shitty","shiver","shivers","shivery","shoal",
+            "shock","shocker","shockheaded","shocking","shockproof",
+            "shod","shoddy","shoe","shoeblack","shoehorn",
+            "shoelace","shoemaker","shoeshine","shoestring","shone",
+            "shoo","shook","shoot","shop","shopkeeper",
+            "shoplift","shopsoiled","shopworn","shore","shorn",
+            "short","shortage","shortbread","shortcake","shortcoming",
+            "shorten","shortening","shortfall","shorthand","shorthanded",
+            "shorthorn","shortie","shortly","shorts","shortsighted",
+            "shorty","shot","shotgun","should","shoulder",
+            "shouldst","shout","shouting","shove","shovel",
+            "shovelboard","show","showboat","showcase","showdown",
+            "shower","showery","showgirl","showing","showman",
+            "showmanship","shown","showpiece","showplace","showroom",
+            "showy","shrank","shrapnel","shred","shredder",
+            "shrew","shrewd","shrewish","shriek","shrift",
+            "shrike","shrill","shrimp","shrine","shrink",
+            "shrinkage","shrive","shrivel","shroud","shrub",
+            "shrubbery","shrug","shuck","shucks","shudder",
+            "shuffle","shuffleboard","shufty","shun","shunt",
+            "shunter","shush","shut","shutdown","shutter",
+            "shuttle","shuttlecock","shy","shyster","sibilant",
+            "sibling","sibyl","sibylline","sic","sick",
+            "sickbay","sickbed","sicken","sickening","sickle",
+            "sickly","sickness","sickroom","side","sidearm",
+            "sideboard","sideboards","sidecar","sidekick","sidelight",
+            "sideline","sidelong","sidereal","sidesaddle","sideshow",
+            "sideslip","sidesman","sidesplitting","sidestep","sidestroke",
+            "sideswipe","sidetrack","sidewalk","sideward","sidewards",
+            "sideways","siding","sidle","siege","sienna",
+            "sierra","siesta","sieve","sift","sifter",
+            "sigh","sight","sighted","sightless","sightly",
+            "sightscreen","sightsee","sightseer","sign","signal",
+            "signaler","signalise","signalize","signaller","signally",
+            "signalman","signatory","signature","signer","signet",
+            "significance","significant","signification","signify","signor",
+            "signora","signorina","signpost","signposted","silage",
+            "silence","silencer","silent","silhouette","silica",
+            "silicate","silicon","silicone","silicosis","silk",
+            "silken","silkworm","silky","sill","sillabub",
+            "silly","silo","silt","silvan","silver",
+            "silverfish","silverside","silversmith","silverware","silvery",
+            "simian","similar","similarity","similarly","simile",
+            "similitude","simmer","simony","simper","simple",
+            "simpleton","simplicity","simplify","simply","simulacrum",
+            "simulate","simulated","simulation","simulator","simultaneous",
+            "sin","since","sincere","sincerely","sincerity",
+            "sinecure","sinew","sinewy","sinful","sing",
+            "singe","singhalese","singing","single","singleness",
+            "singles","singlestick","singlet","singleton","singly",
+            "singsong","singular","singularly","sinhalese","sinister",
+            "sink","sinker","sinless","sinner","sinology",
+            "sinuous","sinus","sip","siphon","sir",
+            "sire","siren","sirloin","sirocco","sirrah",
+            "sis","sisal","sissy","sister","sisterhood",
+            "sisterly","sit","sitar","site","sitter",
+            "sitting","situated","situation","six","sixpence",
+            "sixteen","sixty","sizable","size","sizeable",
+            "sizzle","sizzler","skate","skateboard","skedaddle",
+            "skeet","skein","skeleton","skeptic","skeptical",
+            "skepticism","sketch","sketchpad","sketchy","skew",
+            "skewbald","skewer","ski","skibob","skid",
+            "skidlid","skidpan","skiff","skiffle","skilful",
+            "skill","skilled","skillet","skillful","skim",
+            "skimmer","skimp","skimpy","skin","skinflint",
+            "skinful","skinhead","skinny","skint","skip",
+            "skipper","skirl","skirmish","skirt","skit",
+            "skitter","skittish","skittle","skittles","skive",
+            "skivvy","skua","skulduggery","skulk","skull",
+            "skullcap","skullduggery","skunk","sky","skydiving",
+            "skyhook","skyjack","skylark","skylight","skyline",
+            "skyrocket","skyscraper","skywriting","slab","slack",
+            "slacken","slacker","slacks","slag","slagheap",
+            "slain","slake","slalom","slam","slander",
+            "slanderous","slang","slangy","slant","slantwise",
+            "slap","slapdash","slaphappy","slapstick","slash",
+            "slat","slate","slattern","slaty","slaughter",
+            "slaughterhouse","slave","slaver","slavery","slavic",
+            "slavish","slay","sleazy","sled","sledge",
+            "sledgehammer","sleek","sleep","sleeper","sleepless",
+            "sleepwalker","sleepy","sleepyhead","sleet","sleeve",
+            "sleigh","slender","slenderise","slenderize","slept",
+            "sleuth","slew","slewed","slice","slick",
+            "slicker","slide","slight","slightly","slim",
+            "slimy","sling","slingshot","slink","slip",
+            "slipcover","slipknot","slipover","slipper","slippery",
+            "slippy","slips","slipshod","slipstream","slipway",
+            "slit","slither","slithery","sliver","slivovitz",
+            "slob","slobber","sloe","slog","slogan",
+            "sloop","slop","slope","sloppy","slosh",
+            "sloshed","slot","sloth","slothful","slouch",
+            "slough","sloven","slovenly","slow","slowcoach",
+            "slowworm","sludge","slue","slug","sluggard",
+            "sluggish","sluice","sluiceway","slum","slumber",
+            "slumberous","slummy","slump","slung","slunk",
+            "slur","slurp","slurry","slush","slut",
+            "sly","smack","smacker","small","smallholder",
+            "smallholding","smallpox","smalls","smarmy","smart",
+            "smarten","smash","smashed","smasher","smashing",
+            "smattering","smear","smell","smelly","smelt",
+            "smile","smirch","smirk","smite","smith",
+            "smithereens","smithy","smitten","smock","smocking",
+            "smog","smoke","smoker","smokescreen","smokestack",
+            "smoking","smoky","smolder","smooch","smooth",
+            "smoothie","smoothy","smorgasbord","smote","smother",
+            "smoulder","smudge","smug","smuggle","smut",
+            "smutty","snack","snaffle","snag","snail",
+            "snake","snakebite","snaky","snap","snapdragon",
+            "snapper","snappish","snappy","snapshot","snare",
+            "snarl","snatch","snazzy","sneak","sneaker",
+            "sneaking","sneaky","sneer","sneeze","snick",
+            "snicker","snide","sniff","sniffle","sniffles",
+            "sniffy","snifter","snigger","snip","snippet",
+            "snips","snitch","snivel","snob","snobbery",
+            "snobbish","snog","snood","snook","snooker",
+            "snoop","snooper","snoot","snooty","snooze",
+            "snore","snorkel","snort","snorter","snot",
+            "snotty","snout","snow","snowball","snowberry",
+            "snowbound","snowdrift","snowdrop","snowfall","snowfield",
+            "snowflake","snowline","snowman","snowplough","snowplow",
+            "snowshoe","snowstorm","snowy","snr","snub",
+            "snuff","snuffer","snuffle","snug","snuggle",
+            "soak","soaked","soaking","soap","soapbox",
+            "soapstone","soapsuds","soapy","soar","sob",
+            "sober","sobriety","sobriquet","soccer","sociable",
+            "social","socialise","socialism","socialist","socialite",
+            "socialize","society","sociology","sock","socket",
+            "sod","soda","sodden","sodium","sodomite",
+            "sodomy","soever","sofa","soft","softball",
+            "soften","softhearted","softie","software","softwood",
+            "softy","soggy","soigne","soignee","soil",
+            "sojourn","sol","solace","solar","solarium",
+            "sold","solder","soldier","soldierly","soldiery",
+            "sole","solecism","solely","solemn","solemnise",
+            "solemnity","solemnize","solicit","solicitor","solicitous",
+            "solicitude","solid","solidarity","solidify","solidity",
+            "solidus","soliloquise","soliloquize","soliloquy","solipsism",
+            "solitaire","solitary","solitude","solo","soloist",
+            "solstice","soluble","solution","solve","solvency",
+            "solvent","somber","sombre","sombrero","some",
+            "somebody","someday","somehow","somersault","something",
+            "sometime","sometimes","someway","somewhat","somewhere",
+            "somnambulism","somnolent","son","sonar","sonata",
+            "song","songbird","songbook","songster","sonic",
+            "sonnet","sonny","sonority","sonorous","sonsy",
+            "soon","soot","soothe","soothsayer","sop",
+            "sophism","sophisticate","sophisticated","sophistication","sophistry",
+            "sophomore","soporific","sopping","soppy","soprano",
+            "sorbet","sorcerer","sorcery","sordid","sore",
+            "sorehead","sorely","sorghum","sorority","sorrel",
+            "sorrow","sorry","sort","sortie","sos",
+            "sot","sottish","sou","soubrette","soubriquet",
+            "sough","sought","soul","soulful","soulless",
+            "sound","soundings","soundproof","soundtrack","soup",
+            "sour","source","sourdough","sourpuss","sousaphone",
+            "souse","soused","south","southbound","southeast",
+            "southeaster","southeasterly","southeastern","southeastward","southeastwards",
+            "southerly","southern","southerner","southernmost","southpaw",
+            "southward","southwards","southwest","southwester","southwesterly",
+            "southwestern","southwestward","southwestwards","souvenir","sovereign",
+            "sovereignty","soviet","sow","sox","soy",
+            "soybean","sozzled","spa","space","spacecraft",
+            "spaceship","spacesuit","spacing","spacious","spade",
+            "spadework","spaghetti","spake","spam","span",
+            "spangle","spaniel","spank","spanking","spanner",
+            "spar","spare","spareribs","sparing","spark",
+            "sparkle","sparkler","sparks","sparrow","sparse",
+            "spartan","spasm","spasmodic","spastic","spat",
+            "spatchcock","spate","spatial","spatter","spatula",
+            "spavin","spawn","spay","speak","speakeasy",
+            "speaker","speakership","spear","spearhead","spearmint",
+            "spec","special","specialise","specialised","specialist",
+            "speciality","specialize","specialized","specially","specie",
+            "species","specific","specifically","specification","specifics",
+            "specify","specimen","specious","speck","speckle",
+            "spectacle","spectacled","spectacles","spectacular","spectator",
+            "specter","spectral","spectre","spectroscope","spectrum",
+            "speculate","speculation","speculative","speech","speechify",
+            "speechless","speed","speedboat","speeding","speedometer",
+            "speedway","speedwell","speedy","spelaeology","speleology",
+            "spell","spellbind","spelling","spend","spender",
+            "spendthrift","spent","sperm","spermaceti","spermatozoa",
+            "spew","sphagnum","sphere","spherical","spheroid",
+            "sphincter","sphinx","spice","spicy","spider",
+            "spidery","spiel","spigot","spike","spikenard",
+            "spiky","spill","spillover","spillway","spin",
+            "spinach","spinal","spindle","spindly","spine",
+            "spineless","spinet","spinnaker","spinner","spinney",
+            "spinster","spiny","spiral","spire","spirit",
+            "spirited","spiritless","spirits","spiritual","spiritualise",
+            "spiritualism","spirituality","spiritualize","spirituous","spirt",
+            "spit","spite","spitfire","spittle","spittoon",
+            "spiv","splash","splashy","splat","splatter",
+            "splay","splayfoot","spleen","splendid","splendiferous",
+            "splendor","splendour","splenetic","splice","splicer",
+            "splint","splinter","split","splits","splitting",
+            "splotch","splurge","splutter","spoil","spoilage",
+            "spoils","spoilsport","spoke","spoken","spokeshave",
+            "spokesman","spoliation","spondee","sponge","spongy",
+            "sponsor","spontaneous","spoof","spook","spooky",
+            "spool","spoon","spoonerism","spoonful","spoor",
+            "sporadic","spore","sporran","sport","sporting",
+            "sportive","sports","sportsman","sportsmanlike","sportsmanship",
+            "sporty","spot","spotless","spotlight","spotted",
+            "spotter","spotty","spouse","spout","sprain",
+            "sprang","sprat","sprawl","spray","sprayer",
+            "spread","spree","sprig","sprigged","sprightly",
+            "spring","springboard","springbok","springtime","springy",
+            "sprinkle","sprinkler","sprinkling","sprint","sprite",
+            "sprocket","sprout","spruce","sprung","spry",
+            "spud","spume","spun","spunk","spur",
+            "spurious","spurn","spurt","sputter","sputum",
+            "spy","spyglass","squab","squabble","squad",
+            "squadron","squalid","squall","squalor","squander",
+            "square","squash","squashy","squat","squatter",
+            "squaw","squawk","squeak","squeaky","squeal",
+            "squeamish","squeegee","squeeze","squeezer","squelch",
+            "squib","squid","squidgy","squiffy","squiggle",
+            "squint","squirarchy","squire","squirearchy","squirm",
+            "squirrel","squirt","squirter","sri","srn",
+            "ssh","stab","stabbing","stabilise","stabiliser",
+            "stability","stabilize","stabilizer","stable","stabling",
+            "staccato","stack","stadium","staff","stag",
+            "stage","stagecoach","stager","stagestruck","stagger",
+            "staggering","staggers","staging","stagnant","stagnate",
+            "stagy","staid","stain","stainless","stair",
+            "staircase","stairs","stairwell","stake","stakeholder",
+            "stakes","stalactite","stalagmite","stale","stalemate",
+            "stalk","stall","stallholder","stallion","stalls",
+            "stalwart","stamen","stamina","stammer","stamp",
+            "stampede","stance","stanch","stanchion","stand",
+            "standard","standardise","standardize","standby","standing",
+            "standoffish","standpipe","standpoint","standstill","stank",
+            "stanza","staple","stapler","star","starboard",
+            "starch","starchy","stardom","stardust","stare",
+            "starfish","stargazer","stargazing","staring","stark",
+            "starkers","starlet","starlight","starling","starlit",
+            "starry","stars","start","starter","starters",
+            "startle","starvation","starve","starveling","stash",
+            "state","statecraft","statehood","stateless","stately",
+            "statement","stateroom","states","stateside","statesman",
+            "static","statics","station","stationary","stationer",
+            "stationery","stationmaster","statistic","statistician","statistics",
+            "statuary","statue","statuesque","statuette","stature",
+            "status","statute","statutory","staunch","stave",
+            "staves","stay","stayer","stays","std",
+            "stead","steadfast","steady","steak","steal",
+            "stealth","stealthy","steam","steamboat","steamer",
+            "steamroller","steamship","steed","steel","steelworker",
+            "steelworks","steely","steelyard","steenbok","steep",
+            "steepen","steeple","steeplechase","steeplejack","steer",
+            "steerage","steerageway","steersman","stein","steinbok",
+            "stele","stellar","stem","stench","stencil",
+            "stenographer","stenography","stentorian","step","stepbrother",
+            "stepchild","stepladder","stepparent","steps","stepsister",
+            "stereo","stereoscope","stereoscopic","stereotype","sterile",
+            "sterilise","sterility","sterilize","sterling","stern",
+            "sternum","steroid","stertorous","stet","stethoscope",
+            "stetson","stevedore","stew","steward","stewardess",
+            "stewardship","stewed","stick","sticker","stickleback",
+            "stickler","stickpin","sticks","sticky","stiff",
+            "stiffen","stiffener","stiffening","stifle","stigma",
+            "stigmata","stigmatise","stigmatize","stile","stiletto",
+            "still","stillbirth","stillborn","stillroom","stilly",
+            "stilt","stilted","stilton","stimulant","stimulate",
+            "stimulus","sting","stinger","stingo","stingray",
+            "stingy","stink","stinking","stint","stipend",
+            "stipendiary","stipple","stipulate","stipulation","stir",
+            "stirrer","stirring","stirrup","stitch","stoat",
+            "stock","stockade","stockbreeder","stockbroker","stockcar",
+            "stockfish","stockholder","stockily","stockinet","stockinette",
+            "stocking","stockist","stockjobber","stockman","stockpile",
+            "stockpot","stockroom","stocks","stocktaking","stocky",
+            "stockyard","stodge","stodgy","stoic","stoical",
+            "stoicism","stoke","stokehold","stoker","stole",
+            "stolen","stolid","stomach","stomachache","stomachful",
+            "stomp","stone","stonebreaker","stonecutter","stoned",
+            "stoneless","stonemason","stonewall","stoneware","stonework",
+            "stony","stood","stooge","stool","stoolpigeon",
+            "stoop","stop","stopcock","stopgap","stopover",
+            "stoppage","stopper","stopping","stopwatch","storage",
+            "store","storehouse","storekeeper","storeroom","stores",
+            "storey","storied","stork","storm","stormbound",
+            "stormy","story","storybook","storyteller","stoup",
+            "stout","stouthearted","stove","stovepipe","stow",
+            "stowage","stowaway","straddle","stradivarius","strafe",
+            "straggle","straggly","straight","straightaway","straightedge",
+            "straighten","straightforward","straightway","strain","strained",
+            "strainer","strait","straitened","straitjacket","straitlaced",
+            "straits","strand","stranded","strange","stranger",
+            "strangle","stranglehold","strangulate","strangulation","strap",
+            "straphanging","strapless","strapping","strata","stratagem",
+            "strategic","strategist","strategy","stratification","stratify",
+            "stratosphere","stratum","straw","strawberry","strawboard",
+            "stray","streak","streaker","streaky","stream",
+            "streamer","streamline","streamlined","street","streetcar",
+            "streetwalker","strength","strengthen","strenuous","streptococcus",
+            "streptomycin","stress","stretch","stretcher","stretchy",
+            "strew","strewth","striated","striation","stricken",
+            "strict","stricture","stride","stridency","strident",
+            "stridulate","strife","strike","strikebound","strikebreaker",
+            "strikebreaking","striker","striking","string","stringency",
+            "stringent","strings","stringy","strip","stripe",
+            "striped","stripling","stripper","striptease","stripy",
+            "strive","strode","stroke","stroll","stroller",
+            "strolling","strong","strongarm","strongbox","stronghold",
+            "strontium","strop","strophe","stroppy","strove",
+            "struck","structural","structure","strudel","struggle",
+            "strum","strumpet","strung","strut","strychnine",
+            "stub","stubble","stubborn","stubby","stucco",
+            "stuck","stud","studbook","student","studied",
+            "studio","studious","study","stuff","stuffing",
+            "stuffy","stultify","stumble","stump","stumper",
+            "stumpy","stun","stung","stunk","stunner",
+            "stunning","stunt","stupefaction","stupefy","stupendous",
+            "stupid","stupidity","stupor","sturdy","sturgeon",
+            "stutter","sty","stye","stygian","style",
+            "stylise","stylish","stylist","stylistic","stylistics",
+            "stylize","stylus","stymie","styptic","suasion",
+            "suave","sub","subaltern","subatomic","subcommittee",
+            "subconscious","subcontinent","subcontract","subcontractor","subcutaneous",
+            "subdivide","subdue","subdued","subedit","subeditor",
+            "subheading","subhuman","subject","subjection","subjective",
+            "subjoin","subjugate","subjunctive","sublease","sublet",
+            "sublieutenant","sublimate","sublime","subliminal","submarine",
+            "submariner","submerge","submergence","submersible","submission",
+            "submissive","submit","subnormal","suborbital","subordinate",
+            "suborn","subplot","subpoena","subscribe","subscriber",
+            "subscription","subsequent","subservience","subservient","subside",
+            "subsidence","subsidiary","subsidise","subsidize","subsidy",
+            "subsist","subsistence","subsoil","subsonic","substance",
+            "substandard","substantial","substantially","substantiate","substantival",
+            "substantive","substation","substitute","substratum","substructure",
+            "subsume","subtenant","subtend","subterfuge","subterranean",
+            "subtitle","subtitles","subtle","subtlety","subtopia",
+            "subtract","subtraction","subtropical","suburb","suburban",
+            "suburbanite","suburbia","suburbs","subvention","subversive",
+            "subvert","subway","succeed","success","successful",
+            "succession","successive","successor","succinct","succor",
+            "succour","succubus","succulence","succulent","succumb",
+            "such","suchlike","suck","sucker","suckle",
+            "suckling","sucrose","suction","sudden","suds",
+            "sue","suet","suffer","sufferable","sufferance",
+            "sufferer","suffering","suffice","sufficiency","sufficient",
+            "suffix","suffocate","suffragan","suffrage","suffragette",
+            "suffuse","sugar","sugarcane","sugarcoated","sugarloaf",
+            "sugary","suggest","suggestible","suggestion","suggestive",
+            "suicidal","suicide","suit","suitability","suitable",
+            "suitcase","suiting","suitor","sulfate","sulfide",
+            "sulfur","sulfuret","sulfurous","sulk","sulks",
+            "sulky","sullen","sully","sulphate","sulphide",
+            "sulphur","sulphuret","sulphurous","sultan","sultana",
+            "sultanate","sultry","sum","sumac","sumach",
+            "summarise","summarize","summary","summat","summation",
+            "summer","summerhouse","summertime","summery","summit",
+            "summon","summons","sump","sumptuary","sumptuous",
+            "sun","sunbaked","sunbathe","sunbeam","sunblind",
+            "sunbonnet","sunburn","sunburnt","sundae","sunday",
+            "sundeck","sunder","sundew","sundial","sundown",
+            "sundowner","sundrenched","sundries","sundry","sunfish",
+            "sunflower","sung","sunglasses","sunk","sunken",
+            "sunlamp","sunless","sunlight","sunlit","sunny",
+            "sunray","sunrise","sunroof","sunset","sunshade",
+            "sunshine","sunspot","sunstroke","suntan","suntrap",
+            "sup","super","superabundance","superabundant","superannuate",
+            "superannuated","superannuation","superb","supercharged","supercharger",
+            "supercilious","superconductivity","superduper","superego","superficial",
+            "superficies","superfine","superfluity","superfluous","superhuman",
+            "superimpose","superintend","superintendent","superior","superlative",
+            "superlatively","superman","supermarket","supernal","supernatural",
+            "supernova","supernumerary","superscription","supersede","supersession",
+            "supersonic","superstar","superstition","superstitious","superstructure",
+            "supertax","supervene","supervise","supervisory","supine",
+            "supper","supplant","supple","supplement","supplementary",
+            "suppliant","supplicant","supplicate","supplier","supplies",
+            "supply","support","supportable","supporter","supportive",
+            "suppose","supposed","supposedly","supposing","supposition",
+            "suppository","suppress","suppression","suppressive","suppressor",
+            "suppurate","supranational","supremacist","supremacy","supreme",
+            "surcharge","surcoat","surd","sure","surefire",
+            "surefooted","surely","surety","surf","surface",
+            "surfboard","surfboat","surfeit","surfer","surge",
+            "surgeon","surgery","surgical","surly","surmise",
+            "surmount","surname","surpass","surpassing","surplice",
+            "surplus","surprise","surprising","surreal","surrealism",
+            "surrealist","surrealistic","surrender","surreptitious","surrey",
+            "surrogate","surround","surrounding","surroundings","surtax",
+            "surveillance","survey","surveyor","survival","survive",
+            "survivor","susceptibilities","susceptibility","susceptible","suspect",
+            "suspend","suspender","suspenders","suspense","suspension",
+            "suspicion","suspicious","sustain","sustenance","suttee",
+            "suture","suzerain","suzerainty","svelte","swab",
+            "swaddle","swag","swagger","swain","swallow",
+            "swallowtailed","swam","swami","swamp","swampy",
+            "swan","swank","swanky","swansdown","swansong",
+            "swap","sward","swarf","swarm","swarthy",
+            "swashbuckler","swashbuckling","swastika","swat","swatch",
+            "swath","swathe","swatter","sway","swayback",
+            "swear","swearword","sweat","sweatband","sweated",
+            "sweater","sweatshirt","sweatshop","sweaty","swede",
+            "sweep","sweeper","sweeping","sweepings","sweepstake",
+            "sweepstakes","sweet","sweetbread","sweetbriar","sweetbrier",
+            "sweeten","sweetener","sweetening","sweetheart","sweetie",
+            "sweetish","sweetmeat","sweets","swell","swelling",
+            "swelter","sweltering","swept","swerve","swift",
+            "swig","swill","swim","swimming","swimmingly",
+            "swindle","swine","swineherd","swing","swingeing",
+            "swinger","swinging","swinish","swipe","swirl",
+            "swish","switch","switchback","switchblade","switchboard",
+            "switchgear","switchman","swivel","swiz","swizzle",
+            "swollen","swoon","swoop","swop","sword",
+            "swordfish","swordplay","swordsman","swordsmanship","swordstick",
+            "swore","sworn","swot","swum","swung",
+            "sybarite","sybaritic","sycamore","sycophant","sycophantic",
+            "sylabub","syllabary","syllabic","syllabify","syllable",
+            "syllabub","syllabus","syllogism","syllogistic","sylph",
+            "sylphlike","sylvan","symbiosis","symbol","symbolic",
+            "symbolise","symbolism","symbolist","symbolize","symmetrical",
+            "symmetry","sympathetic","sympathies","sympathise","sympathize",
+            "sympathy","symphonic","symphony","symposium","symptom",
+            "symptomatic","synagogue","sync","synch","synchonise",
+            "synchromesh","synchronize","synchrotron","syncopate","syncope",
+            "syndic","syndicalism","syndicate","syndrome","synod",
+            "synonym","synonymous","synopsis","synoptic","syntactic",
+            "syntax","synthesis","synthesise","synthesiser","synthesize",
+            "synthesizer","synthetic","syphilis","syphilitic","syphon",
+            "syringe","syrup","syrupy","system","systematic",
+            "systematise","systematize","systemic","tab","tabard",
+            "tabasco","tabby","tabernacle","table","tableau",
+            "tablecloth","tableland","tablemat","tablespoon","tablespoonful",
+            "tablet","tableware","tabloid","taboo","tabor",
+            "tabular","tabulate","tabulator","tacit","taciturn",
+            "tack","tackiness","tackle","tacky","tact",
+            "tactic","tactical","tactician","tactics","tactile",
+            "tactual","tadpole","taffeta","taffrail","taffy",
+            "tag","tail","tailback","tailboard","tailcoat",
+            "taillight","tailor","tailpiece","tails","tailspin",
+            "tailwind","taint","take","takeaway","takeoff",
+            "takeover","taking","takings","talc","tale",
+            "talebearer","talent","talented","talisman","talk",
+            "talkative","talker","talkie","talks","tall",
+            "tallboy","tallow","tally","tallyho","tallyman",
+            "talmud","talon","tamale","tamarind","tamarisk",
+            "tambour","tambourine","tame","tammany","tamp",
+            "tamper","tampon","tan","tandem","tang",
+            "tangent","tangential","tangerine","tangible","tangle",
+            "tango","tank","tankard","tanker","tanner",
+            "tannery","tannin","tanning","tannoy","tansy",
+            "tantalise","tantalize","tantalus","tantamount","tantrum",
+            "taoism","tap","tape","taper","tapestry",
+            "tapeworm","tapioca","tapir","tappet","taproom",
+            "taproot","taps","tar","tarantella","tarantula",
+            "tarboosh","tardy","target","tariff","tarmac",
+            "tarn","tarnish","taro","tarot","tarpaulin",
+            "tarragon","tarry","tarsal","tarsus","tart",
+            "tartan","tartar","task","taskmaster","tassel",
+            "taste","tasteful","tasteless","taster","tasty",
+            "tat","tatas","tatter","tattered","tatters",
+            "tatting","tattle","tattoo","tattooist","tatty",
+            "taught","taunt","taurus","taut","tautological",
+            "tautology","tavern","tawdry","tawny","tawse",
+            "tax","taxation","taxi","taxidermist","taxidermy",
+            "taximeter","taxonomy","tea","teabag","teacake",
+            "teach","teacher","teaching","teacup","teacupful",
+            "teagarden","teahouse","teak","teakettle","teal",
+            "tealeaf","team","teamster","teamwork","teapot",
+            "tear","tearaway","teardrop","tearful","teargas",
+            "tearjerker","tearless","tearoom","tease","teasel",
+            "teaser","teaspoon","teaspoonful","teat","teatime",
+            "teazle","tech","technical","technicality","technician",
+            "technique","technocracy","technocrat","technological","technologist",
+            "technology","techy","tedious","tedium","tee",
+            "teem","teeming","teenage","teenager","teens",
+            "teenybopper","teeter","teeth","teethe","teetotal",
+            "teetotaler","teetotaller","teflon","tegument","tele",
+            "telecast","telecommunications","telegram","telegraph","telegrapher",
+            "telegraphese","telegraphic","telemarketing","telemeter","telemetry",
+            "teleology","telepathic","telepathist","telepathy","telephone",
+            "telephonist","telephony","telephotograph","telephotography","teleprinter",
+            "teleprompter","telescope","telescopic","televise","television",
+            "televisual","telex","telfer","tell","teller",
+            "telling","telltale","telly","telpher","telstar",
+            "temerity","temp","temper","tempera","temperament",
+            "temperamental","temperance","temperate","temperature","tempest",
+            "tempestuous","template","temple","templet","tempo",
+            "temporal","temporary","temporise","temporize","tempt",
+            "temptation","ten","tenable","tenacious","tenacity",
+            "tenancy","tenant","tenantry","tench","tend",
+            "tendency","tendentious","tender","tenderfoot","tenderhearted",
+            "tenderise","tenderize","tenderloin","tendon","tendril",
+            "tenement","tenet","tenner","tennis","tenon",
+        };
     }
 }
\ No newline at end of file


[20/50] [abbrv] lucenenet git commit: Implemented missing Lucene41WithOrds class which was causing some random codec tests to fail sporadically.

Posted by sy...@apache.org.
Implemented missing Lucene41WithOrds class which was causing some random codec tests to fail sporadically.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/406f88a6
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/406f88a6
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/406f88a6

Branch: refs/heads/analysis-work
Commit: 406f88a6b778c3942707341902b1d6f5e263ce0f
Parents: 828819d
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 07:21:46 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 07:21:46 2016 +0700

----------------------------------------------------------------------
 .../Codecs/Lucene41Ords/Lucene41WithOrds.cs     | 126 +++++++++++++++++++
 .../Lucene.Net.TestFramework.csproj             |   1 +
 2 files changed, 127 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/406f88a6/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/Lucene41WithOrds.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/Lucene41WithOrds.cs b/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/Lucene41WithOrds.cs
new file mode 100644
index 0000000..37f8243
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Codecs/Lucene41Ords/Lucene41WithOrds.cs
@@ -0,0 +1,126 @@
+\ufeffusing Lucene.Net.Codecs.BlockTerms;
+using Lucene.Net.Codecs.Lucene41;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Codecs.Lucene41Ords
+{
+    public sealed class Lucene41WithOrds : PostingsFormat
+    {
+        public Lucene41WithOrds()
+            : base("Lucene41WithOrds")
+        {
+        }
+
+        public override FieldsConsumer FieldsConsumer(SegmentWriteState state)
+        {
+            PostingsWriterBase docs = new Lucene41PostingsWriter(state);
+
+            // TODO: should we make the terms index more easily
+            // pluggable?  Ie so that this codec would record which
+            // index impl was used, and switch on loading?
+            // Or... you must make a new Codec for this?
+            TermsIndexWriterBase indexWriter;
+            bool success = false;
+            try
+            {
+                indexWriter = new FixedGapTermsIndexWriter(state);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    docs.Dispose();
+                }
+            }
+
+            success = false;
+            try
+            {
+                // Must use BlockTermsWriter (not BlockTree) because
+                // BlockTree doens't support ords (yet)...
+                FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
+                success = true;
+                return ret;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        docs.Dispose();
+                    }
+                    finally
+                    {
+                        indexWriter.Dispose();
+                    }
+                }
+            }
+        }
+
+        public override FieldsProducer FieldsProducer(SegmentReadState state)
+        {
+            PostingsReaderBase postings = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
+            TermsIndexReaderBase indexReader;
+
+            bool success = false;
+            try
+            {
+                indexReader = new FixedGapTermsIndexReader(state.Directory,
+                                                           state.FieldInfos,
+                                                           state.SegmentInfo.Name,
+                                                           state.TermsIndexDivisor,
+                                                           BytesRef.UTF8SortedAsUnicodeComparer,
+                                                           state.SegmentSuffix, state.Context);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    postings.Dispose();
+                }
+            }
+
+            success = false;
+            try
+            {
+                FieldsProducer ret = new BlockTermsReader(indexReader,
+                                                          state.Directory,
+                                                          state.FieldInfos,
+                                                          state.SegmentInfo,
+                                                          postings,
+                                                          state.Context,
+                                                          state.SegmentSuffix);
+                success = true;
+                return ret;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        postings.Dispose();
+                    }
+                    finally
+                    {
+                        indexReader.Dispose();
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Extension of freq postings file
+        /// </summary>
+        static readonly string FREQ_EXTENSION = "frq";
+
+        /// <summary>
+        /// Extension of prox postings file
+        /// </summary>
+        static readonly string PROX_EXTENSION = "prx";
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/406f88a6/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index 972bcce..f62716e 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -227,6 +227,7 @@
     <Compile Include="Codecs\lucene40\Lucene40SkipListWriter.cs">
       <SubType>Code</SubType>
     </Compile>
+    <Compile Include="Codecs\Lucene41Ords\Lucene41WithOrds.cs" />
     <Compile Include="Codecs\lucene41\Lucene41RWCodec.cs">
       <SubType>Code</SubType>
     </Compile>


[31/50] [abbrv] lucenenet git commit: Fixed bugs in the Join.TestJoinUtil that were causing the TestSingleValueRandomJoin() test to fail.

Posted by sy...@apache.org.
Fixed bugs in the Join.TestJoinUtil that were causing the TestSingleValueRandomJoin() test to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5b9b2fed
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5b9b2fed
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5b9b2fed

Branch: refs/heads/analysis-work
Commit: 5b9b2fedcd24523cb548054903a64e724e379e07
Parents: 5502415
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 10:22:11 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 10:22:11 2016 +0700

----------------------------------------------------------------------
 src/Lucene.Net.Tests.Join/TestJoinUtil.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5b9b2fed/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
index 1d2da10..4e25017 100644
--- a/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
+++ b/src/Lucene.Net.Tests.Join/TestJoinUtil.cs
@@ -892,7 +892,7 @@ namespace Lucene.Net.Tests.Join
             private readonly IDictionary<int, JoinScore> _docToJoinScore;
 
             private SortedSetDocValues docTermOrds;
-            private readonly BytesRef scratch;
+            private readonly BytesRef scratch = new BytesRef();
             private int docBase;
 
             public CollectorAnonymousInnerClassHelper5(TestJoinUtil testJoinUtil, IndexIterationContext context, 
@@ -958,7 +958,7 @@ namespace Lucene.Net.Tests.Join
 
             private BinaryDocValues terms;
             private int docBase;
-            private readonly BytesRef spare;
+            private readonly BytesRef spare = new BytesRef();
 
             public CollectorAnonymousInnerClassHelper6(TestJoinUtil testJoinUtil, 
                 IndexIterationContext context, string toField, 
@@ -974,7 +974,7 @@ namespace Lucene.Net.Tests.Join
             public override void Collect(int doc)
             {
                 terms.Get(doc, spare);
-                JoinScore joinScore = JoinValueToJoinScores[spare];
+                JoinScore joinScore = JoinValueToJoinScores.ContainsKey(spare) ? JoinValueToJoinScores[spare] : null;
                 if (joinScore == null)
                 {
                     return;


[05/50] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
index bf59a70..298f481 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -1,9 +1,10 @@
-\ufeffusing System.Collections.Generic;
+\ufeffusing Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using System.Collections.Generic;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,152 +21,149 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-
-	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-
-	/// <summary>
-	/// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
-	/// stems, this filter can emit multiple tokens for each consumed token
-	/// 
-	/// <para>
-	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
-	/// certain terms from being passed to the stemmer
-	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
-	/// in a previous <seealso cref="TokenStream"/>.
-	/// 
-	/// Note: For including the original term as well as the stemmed version, see
-	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
-	/// </para>
-	/// 
-	/// @lucene.experimental
-	/// </summary>
-	public sealed class HunspellStemFilter : TokenFilter
-	{
-
-	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
-	  private readonly Stemmer stemmer;
-
-	  private IList<CharsRef> buffer;
-	  private State savedState;
-
-	  private readonly bool dedup;
-	  private readonly bool longestOnly;
-
-	  /// <summary>
-	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
-	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary) : this(input, dictionary, true)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
-	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup) : this(input, dictionary, dedup, false)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-	  /// Dictionary
-	  /// </summary>
-	  /// <param name="input"> TokenStream whose tokens will be stemmed </param>
-	  /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
-	  /// <param name="longestOnly"> true if only the longest term should be output. </param>
-	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) : base(input)
-	  {
-		this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
-		this.stemmer = new Stemmer(dictionary);
-		this.longestOnly = longestOnly;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
-	  public override bool incrementToken()
-	  {
-		if (buffer != null && buffer.Count > 0)
-		{
-		  CharsRef nextStem = buffer.Remove(0);
-		  restoreState(savedState);
-		  posIncAtt.PositionIncrement = 0;
-		  termAtt.setEmpty().append(nextStem);
-		  return true;
-		}
-
-		if (!input.incrementToken())
-		{
-		  return false;
-		}
-
-		if (keywordAtt.Keyword)
-		{
-		  return true;
-		}
-
-		buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
-
-		if (buffer.Count == 0) // we do not know this word, return it unchanged
-		{
-		  return true;
-		}
-
-		if (longestOnly && buffer.Count > 1)
-		{
-		  buffer.Sort(lengthComparator);
-		}
-
-		CharsRef stem = buffer.Remove(0);
-		termAtt.setEmpty().append(stem);
-
-		if (longestOnly)
-		{
-		  buffer.Clear();
-		}
-		else
-		{
-		  if (buffer.Count > 0)
-		  {
-			savedState = captureState();
-		  }
-		}
-
-		return true;
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
-	  public override void reset()
-	  {
-		base.reset();
-		buffer = null;
-	  }
-
-	  internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
-
-	  private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
-	  {
-		  public ComparatorAnonymousInnerClassHelper()
-		  {
-		  }
-
-		  public virtual int Compare(CharsRef o1, CharsRef o2)
-		  {
-			if (o2.length == o1.length)
-			{
-			  // tie break on text
-			  return o2.compareTo(o1);
-			}
-			else
-			{
-			  return o2.length < o1.length ? - 1 : 1;
-			}
-		  }
-	  }
-	}
-
+    /// <summary>
+    /// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
+    /// stems, this filter can emit multiple tokens for each consumed token
+    /// 
+    /// <para>
+    /// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+    /// certain terms from being passed to the stemmer
+    /// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+    /// in a previous <seealso cref="TokenStream"/>.
+    /// 
+    /// Note: For including the original term as well as the stemmed version, see
+    /// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+    /// </para>
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public sealed class HunspellStemFilter : TokenFilter
+    {
+
+        private readonly ICharTermAttribute termAtt;
+        private readonly IPositionIncrementAttribute posIncAtt;
+        private readonly IKeywordAttribute keywordAtt;
+        private readonly Stemmer stemmer;
+
+        private List<CharsRef> buffer;
+        private State savedState;
+
+        private readonly bool dedup;
+        private readonly bool longestOnly;
+
+        /// <summary>
+        /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+        ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary)
+              : this(input, dictionary, true)
+        {
+        }
+
+        /// <summary>
+        /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+        ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup)
+              : this(input, dictionary, dedup, false)
+        {
+        }
+
+        /// <summary>
+        /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+        /// Dictionary
+        /// </summary>
+        /// <param name="input"> TokenStream whose tokens will be stemmed </param>
+        /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
+        /// <param name="longestOnly"> true if only the longest term should be output. </param>
+        public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) :
+              base(input)
+        {
+            this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
+            this.stemmer = new Stemmer(dictionary);
+            this.longestOnly = longestOnly;
+            termAtt = AddAttribute<ICharTermAttribute>();
+            posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+            keywordAtt = AddAttribute<IKeywordAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (buffer != null && buffer.Count > 0)
+            {
+                CharsRef nextStem = buffer[0];
+                buffer.RemoveAt(0);
+                RestoreState(savedState);
+                posIncAtt.PositionIncrement = 0;
+                termAtt.SetEmpty().Append(nextStem);
+                return true;
+            }
+
+            if (!input.IncrementToken())
+            {
+                return false;
+            }
+
+            if (keywordAtt.Keyword)
+            {
+                return true;
+            }
+
+            buffer = new List<CharsRef>(dedup ? stemmer.UniqueStems(termAtt.Buffer(), termAtt.Length) : stemmer.Stem(termAtt.Buffer(), termAtt.Length));
+
+            if (buffer.Count == 0) // we do not know this word, return it unchanged
+            {
+                return true;
+            }
+
+            if (longestOnly && buffer.Count > 1)
+            {
+                buffer.Sort(lengthComparator);
+            }
+
+            CharsRef stem = buffer[0];
+            buffer.RemoveAt(0);
+            termAtt.SetEmpty().Append(stem);
+
+            if (longestOnly)
+            {
+                buffer.Clear();
+            }
+            else
+            {
+                if (buffer.Count > 0)
+                {
+                    savedState = CaptureState();
+                }
+            }
+
+            return true;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            buffer = null;
+        }
+
+        internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
+
+        private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
+        {
+            public ComparatorAnonymousInnerClassHelper()
+            {
+            }
+
+            public virtual int Compare(CharsRef o1, CharsRef o2)
+            {
+                if (o2.Length == o1.Length)
+                {
+                    // tie break on text
+                    return o2.CompareTo(o1);
+                }
+                else
+                {
+                    return o2.Length < o1.Length ? -1 : 1;
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
index c9888fd..4d720f4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
@@ -1,9 +1,12 @@
-\ufeffusing System.Collections.Generic;
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,97 +23,88 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
+    /// Example config for British English:
+    /// <pre class="prettyprint">
+    /// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
+    ///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
+    ///         affix=&quot;en_GB.aff&quot; 
+    ///         ignoreCase=&quot;false&quot;
+    ///         longestOnly=&quot;false&quot; /&gt;</pre>
+    /// Both parameters dictionary and affix are mandatory.
+    /// Dictionaries for many languages are available through the OpenOffice project.
+    /// 
+    /// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
+    /// @lucene.experimental
+    /// </summary>
+    public class HunspellStemFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private const string PARAM_DICTIONARY = "dictionary";
+        private const string PARAM_AFFIX = "affix";
+        private const string PARAM_RECURSION_CAP = "recursionCap";
+        private const string PARAM_IGNORE_CASE = "ignoreCase";
+        private const string PARAM_LONGEST_ONLY = "longestOnly";
 
-	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
-	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
-	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-
-	/// <summary>
-	/// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
-	/// Example config for British English:
-	/// <pre class="prettyprint">
-	/// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
-	///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
-	///         affix=&quot;en_GB.aff&quot; 
-	///         ignoreCase=&quot;false&quot;
-	///         longestOnly=&quot;false&quot; /&gt;</pre>
-	/// Both parameters dictionary and affix are mandatory.
-	/// Dictionaries for many languages are available through the OpenOffice project.
-	/// 
-	/// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
-	/// @lucene.experimental
-	/// </summary>
-	public class HunspellStemFilterFactory : TokenFilterFactory, ResourceLoaderAware
-	{
-	  private const string PARAM_DICTIONARY = "dictionary";
-	  private const string PARAM_AFFIX = "affix";
-	  private const string PARAM_RECURSION_CAP = "recursionCap";
-	  private const string PARAM_IGNORE_CASE = "ignoreCase";
-	  private const string PARAM_LONGEST_ONLY = "longestOnly";
-
-	  private readonly string dictionaryFiles;
-	  private readonly string affixFile;
-	  private readonly bool ignoreCase;
-	  private readonly bool longestOnly;
-	  private Dictionary dictionary;
-
-	  /// <summary>
-	  /// Creates a new HunspellStemFilterFactory </summary>
-	  public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		dictionaryFiles = require(args, PARAM_DICTIONARY);
-		affixFile = get(args, PARAM_AFFIX);
-		ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
-		longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
-		// this isnt necessary: we properly load all dictionaries.
-		// but recognize and ignore for back compat
-		getBoolean(args, "strictAffixParsing", true);
-		// this isn't necessary: multi-stage stripping is fixed and 
-		// flags like COMPLEXPREFIXES in the data itself control this.
-		// but recognize and ignore for back compat
-		getInt(args, "recursionCap", 0);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+        private readonly string dictionaryFiles;
+        private readonly string affixFile;
+        private readonly bool ignoreCase;
+        private readonly bool longestOnly;
+        private Dictionary dictionary;
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
-	  public virtual void inform(ResourceLoader loader)
-	  {
-		string[] dicts = dictionaryFiles.Split(",", true);
+        /// <summary>
+        /// Creates a new HunspellStemFilterFactory </summary>
+        public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
+        {
+            dictionaryFiles = Require(args, PARAM_DICTIONARY);
+            affixFile = Get(args, PARAM_AFFIX);
+            ignoreCase = GetBoolean(args, PARAM_IGNORE_CASE, false);
+            longestOnly = GetBoolean(args, PARAM_LONGEST_ONLY, false);
+            // this isnt necessary: we properly load all dictionaries.
+            // but recognize and ignore for back compat
+            GetBoolean(args, "strictAffixParsing", true);
+            // this isn't necessary: multi-stage stripping is fixed and 
+            // flags like COMPLEXPREFIXES in the data itself control this.
+            // but recognize and ignore for back compat
+            GetInt(args, "recursionCap", 0);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-		InputStream affix = null;
-		IList<InputStream> dictionaries = new List<InputStream>();
+        public virtual void Inform(IResourceLoader loader)
+        {
+            string[] dicts = dictionaryFiles.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
 
-		try
-		{
-		  dictionaries = new List<>();
-		  foreach (string file in dicts)
-		  {
-			dictionaries.Add(loader.openResource(file));
-		  }
-		  affix = loader.openResource(affixFile);
+            Stream affix = null;
+            IList<Stream> dictionaries = new List<Stream>();
 
-		  this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
-		}
-		catch (ParseException e)
-		{
-		  throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affix);
-		  IOUtils.closeWhileHandlingException(dictionaries);
-		}
-	  }
+            try
+            {
+                dictionaries = new List<Stream>();
+                foreach (string file in dicts)
+                {
+                    dictionaries.Add(loader.OpenResource(file));
+                }
+                affix = loader.OpenResource(affixFile);
 
-	  public override TokenStream create(TokenStream tokenStream)
-	  {
-		return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
-	  }
-	}
+                this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
+            }
+            catch (Exception e)
+            {
+                throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affix);
+                IOUtils.CloseWhileHandlingException(dictionaries);
+            }
+        }
 
+        public override TokenStream Create(TokenStream tokenStream)
+        {
+            return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
index 87872c9..597d6ec 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -1,7 +1,9 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing System;
+using System.Text;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,30 +20,111 @@
 	 * limitations under the License.
 	 */
 
+    // LUCENENET NOTE: This class was refactored from its Java counterpart.
+
+    // many hunspell dictionaries use this encoding, yet java/.NET does not have it?!?!
+    internal sealed class ISO8859_14Encoding : Encoding
+    {
+        private static readonly Decoder decoder = new ISO8859_14Decoder();
+        public override Decoder GetDecoder()
+        {
+            return new ISO8859_14Decoder();
+        }
+
+        public override string EncodingName
+        {
+            get
+            {
+                return "iso-8859-14";
+            }
+        }
+
+        public override int CodePage
+        {
+            get
+            {
+                return 28604;
+            }
+        }
+
+        public override int GetCharCount(byte[] bytes, int index, int count)
+        {
+            return decoder.GetCharCount(bytes, index, count);
+        }
+
+        public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
+        {
+            return decoder.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
+        }
+
+        public override int GetMaxCharCount(int byteCount)
+        {
+            return byteCount;
+        }
+
+
+        #region Encoding Not Implemented
+        public override int GetByteCount(char[] chars, int index, int count)
+        {
+            throw new NotImplementedException();
+        }
+
+        public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
+        {
+            throw new NotImplementedException();
+        }
+
+        public override int GetMaxByteCount(int charCount)
+        {
+            throw new NotImplementedException();
+        }
+
+        #endregion
+    }
 
-	// many hunspell dictionaries use this encoding, yet java does not have it?!?!
-	internal sealed class ISO8859_14Decoder : CharsetDecoder
-	{
+    internal sealed class ISO8859_14Decoder : Decoder
+    {
+        internal static readonly char[] TABLE = new char[]
+        {
+            (char)0x00A0, (char)0x1E02, (char)0x1E03, (char)0x00A3, (char)0x010A, (char)0x010B, (char)0x1E0A, (char)0x00A7,
+            (char)0x1E80, (char)0x00A9, (char)0x1E82, (char)0x1E0B, (char)0x1EF2, (char)0x00AD, (char)0x00AE, (char)0x0178,
+            (char)0x1E1E, (char)0x1E1F, (char)0x0120, (char)0x0121, (char)0x1E40, (char)0x1E41, (char)0x00B6, (char)0x1E56,
+            (char)0x1E81, (char)0x1E57, (char)0x1E83, (char)0x1E60, (char)0x1EF3, (char)0x1E84, (char)0x1E85, (char)0x1E61,
+            (char)0x00C0, (char)0x00C1, (char)0x00C2, (char)0x00C3, (char)0x00C4, (char)0x00C5, (char)0x00C6, (char)0x00C7,
+            (char)0x00C8, (char)0x00C9, (char)0x00CA, (char)0x00CB, (char)0x00CC, (char)0x00CD, (char)0x00CE, (char)0x00CF,
+            (char)0x0174, (char)0x00D1, (char)0x00D2, (char)0x00D3, (char)0x00D4, (char)0x00D5, (char)0x00D6, (char)0x1E6A,
+            (char)0x00D8, (char)0x00D9, (char)0x00DA, (char)0x00DB, (char)0x00DC, (char)0x00DD, (char)0x0176, (char)0x00DF,
+            (char)0x00E0, (char)0x00E1, (char)0x00E2, (char)0x00E3, (char)0x00E4, (char)0x00E5, (char)0x00E6, (char)0x00E7,
+            (char)0x00E8, (char)0x00E9, (char)0x00EA, (char)0x00EB, (char)0x00EC, (char)0x00ED, (char)0x00EE, (char)0x00EF,
+            (char)0x0175, (char)0x00F1, (char)0x00F2, (char)0x00F3, (char)0x00F4, (char)0x00F5, (char)0x00F6, (char)0x1E6B,
+            (char)0x00F8, (char)0x00F9, (char)0x00FA, (char)0x00FB, (char)0x00FC, (char)0x00FD, (char)0x0177, (char)0x00FF
+        };
 
-	  internal static readonly char[] TABLE = new char[] {0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
+        public override int GetCharCount(byte[] bytes, int index, int count)
+        {
+            return count;
+        }
 
-	  internal ISO8859_14Decoder() : base(StandardCharsets.ISO_88591, 1f, 1f); / / fake with similar properties
-	  {
-	  }
+        public override int GetChars(byte[] bytesIn, int byteIndex, int byteCount, char[] charsOut, int charIndex)
+        {
+            int writeCount = 0;
+            int charPointer = charIndex;
 
-	  protected internal override CoderResult decodeLoop(ByteBuffer @in, CharBuffer @out)
-	  {
-		while (@in.hasRemaining() && @out.hasRemaining())
-		{
-		  char ch = (char)(@in.get() & 0xff);
-		  if (ch >= 0xA0)
-		  {
-			ch = TABLE[ch - 0xA0];
-		  }
-		  @out.put(ch);
-		}
-		return @in.hasRemaining() ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
-	  }
-	}
+            for (int i = byteIndex; i <= (byteIndex + byteCount); i++)
+            {
+                // Decode the value
+                char ch = (char)(bytesIn[i] & 0xff);
+                if (ch >= 0xA0)
+                {
+                    ch = TABLE[ch - 0xA0];
+                }
+                // write the value to the correct buffer slot
+                charsOut[charPointer] = ch;
+                writeCount++;
+                charPointer++;
+            }
 
+            return writeCount;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
index 70b4a94..8e0070d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -1,12 +1,17 @@
-\ufeffusing System;
-using System.Diagnostics;
+\ufeffusing Lucene.Net.Analysis.Util;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+using System;
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
 using System.Text;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -23,453 +28,438 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
-
-	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-	using ByteArrayDataInput = org.apache.lucene.store.ByteArrayDataInput;
-	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
-	using BytesRef = org.apache.lucene.util.BytesRef;
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IntsRef = org.apache.lucene.util.IntsRef;
-	using Version = org.apache.lucene.util.Version;
-	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
-
-	/// <summary>
-	/// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
-	/// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
-	/// </summary>
-	internal sealed class Stemmer
-	{
-	  private readonly Dictionary dictionary;
-	  private readonly BytesRef scratch = new BytesRef();
-	  private readonly StringBuilder segment = new StringBuilder();
-	  private readonly ByteArrayDataInput affixReader;
-
-	  // used for normalization
-	  private readonly StringBuilder scratchSegment = new StringBuilder();
-	  private char[] scratchBuffer = new char[32];
-
-	  /// <summary>
-	  /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
-	  /// </summary>
-	  /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
-	  public Stemmer(Dictionary dictionary)
-	  {
-		this.dictionary = dictionary;
-		this.affixReader = new ByteArrayDataInput(dictionary.affixData);
-	  }
-
-	  /// <summary>
-	  /// Find the stem(s) of the provided word.
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> stem(string word)
-	  {
-		return stem(word.ToCharArray(), word.Length);
-	  }
-
-	  /// <summary>
-	  /// Find the stem(s) of the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> stem(char[] word, int length)
-	  {
-
-		if (dictionary.needsInputCleaning)
-		{
-		  scratchSegment.Length = 0;
-		  scratchSegment.Append(word, 0, length);
-		  CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
-		  scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
-		  length = segment.Length;
-		  segment.getChars(0, length, scratchBuffer, 0);
-		  word = scratchBuffer;
-		}
-
-		IList<CharsRef> stems = new List<CharsRef>();
-		IntsRef forms = dictionary.lookupWord(word, 0, length);
-		if (forms != null)
-		{
-		  // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
-		  // just because it exists, does not make it valid...
-		  for (int i = 0; i < forms.length; i++)
-		  {
-			stems.Add(newStem(word, length));
-		  }
-		}
-		stems.AddRange(stem(word, length, -1, -1, -1, 0, true, true, false, false));
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// Find the unique stem(s) of the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to find the stems for </param>
-	  /// <returns> List of stems for the word </returns>
-	  public IList<CharsRef> uniqueStems(char[] word, int length)
-	  {
-		IList<CharsRef> stems = stem(word, length);
-		if (stems.Count < 2)
-		{
-		  return stems;
-		}
-		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
-		IList<CharsRef> deduped = new List<CharsRef>();
-		foreach (CharsRef s in stems)
-		{
-		  if (!terms.contains(s))
-		  {
-			deduped.Add(s);
-			terms.add(s);
-		  }
-		}
-		return deduped;
-	  }
-
-	  private CharsRef newStem(char[] buffer, int length)
-	  {
-		if (dictionary.needsOutputCleaning)
-		{
-		  scratchSegment.Length = 0;
-		  scratchSegment.Append(buffer, 0, length);
-		  try
-		  {
-			Dictionary.applyMappings(dictionary.oconv, scratchSegment);
-		  }
-		  catch (IOException bogus)
-		  {
-			throw new Exception(bogus);
-		  }
-		  char[] cleaned = new char[scratchSegment.Length];
-		  scratchSegment.getChars(0, cleaned.Length, cleaned, 0);
-		  return new CharsRef(cleaned, 0, cleaned.Length);
-		}
-		else
-		{
-		  return new CharsRef(buffer, 0, length);
-		}
-	  }
-
-	  // ================================================= Helper Methods ================================================
-
-	  /// <summary>
-	  /// Generates a list of stems for the provided word
-	  /// </summary>
-	  /// <param name="word"> Word to generate the stems for </param>
-	  /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
-	  /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
-	  /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
-	  /// <param name="recursionDepth"> current recursiondepth </param>
-	  /// <param name="doPrefix"> true if we should remove prefixes </param>
-	  /// <param name="doSuffix"> true if we should remove suffixes </param>
-	  /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
-	  ///        if we are removing a suffix, and it has no continuation requirements, its ok.
-	  ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
-	  /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
-	  ///        this means inner most suffix must also contain circumfix flag. </param>
-	  /// <returns> List of stems, or empty list if no stems are found </returns>
-	  private IList<CharsRef> stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
-	  {
-
-		// TODO: allow this stuff to be reused by tokenfilter
-		IList<CharsRef> stems = new List<CharsRef>();
-
-		if (doPrefix && dictionary.prefixes != null)
-		{
-		  for (int i = length - 1; i >= 0; i--)
-		  {
-			IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
-			if (prefixes == null)
-			{
-			  continue;
-			}
-
-			for (int j = 0; j < prefixes.length; j++)
-			{
-			  int prefix = prefixes.ints[prefixes.offset + j];
-			  if (prefix == previous)
-			  {
-				continue;
-			  }
-			  affixReader.Position = 8 * prefix;
-			  char flag = (char)(affixReader.readShort() & 0xffff);
-			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
-			  int condition = (char)(affixReader.readShort() & 0xffff);
-			  bool crossProduct = (condition & 1) == 1;
-			  condition = (int)((uint)condition >> 1);
-			  char append = (char)(affixReader.readShort() & 0xffff);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final boolean compatible;
-			  bool compatible;
-			  if (recursionDepth == 0)
-			  {
-				compatible = true;
-			  }
-			  else if (crossProduct)
-			  {
-				// cross check incoming continuation class (flag of previous affix) against list.
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				Debug.Assert(prevFlag >= 0);
-				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
-			  }
-			  else
-			  {
-				compatible = false;
-			  }
-
-			  if (compatible)
-			  {
-				int deAffixedStart = i;
-				int deAffixedLength = length - deAffixedStart;
-
-				int stripStart = dictionary.stripOffsets[stripOrd];
-				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
-				int stripLength = stripEnd - stripStart;
-
-				if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
-				{
-				  continue;
-				}
-
-				char[] strippedWord = new char[stripLength + deAffixedLength];
-				Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
-				Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
-
-				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
-
-				stems.AddRange(stemList);
-			  }
-			}
-		  }
-		}
-
-		if (doSuffix && dictionary.suffixes != null)
-		{
-		  for (int i = 0; i < length; i++)
-		  {
-			IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
-			if (suffixes == null)
-			{
-			  continue;
-			}
-
-			for (int j = 0; j < suffixes.length; j++)
-			{
-			  int suffix = suffixes.ints[suffixes.offset + j];
-			  if (suffix == previous)
-			  {
-				continue;
-			  }
-			  affixReader.Position = 8 * suffix;
-			  char flag = (char)(affixReader.readShort() & 0xffff);
-			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
-			  int condition = (char)(affixReader.readShort() & 0xffff);
-			  bool crossProduct = (condition & 1) == 1;
-			  condition = (int)((uint)condition >> 1);
-			  char append = (char)(affixReader.readShort() & 0xffff);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final boolean compatible;
-			  bool compatible;
-			  if (recursionDepth == 0)
-			  {
-				compatible = true;
-			  }
-			  else if (crossProduct)
-			  {
-				// cross check incoming continuation class (flag of previous affix) against list.
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				Debug.Assert(prevFlag >= 0);
-				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
-			  }
-			  else
-			  {
-				compatible = false;
-			  }
-
-			  if (compatible)
-			  {
-				int appendLength = length - i;
-				int deAffixedLength = length - appendLength;
-
-				int stripStart = dictionary.stripOffsets[stripOrd];
-				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
-				int stripLength = stripEnd - stripStart;
-
-				if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
-				{
-				  continue;
-				}
-
-				char[] strippedWord = new char[stripLength + deAffixedLength];
-				Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
-				Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
-
-				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
-
-				stems.AddRange(stemList);
-			  }
-			}
-		  }
-		}
-
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// checks condition of the concatenation of two strings </summary>
-	  // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
-	  // but this is a little bit more complicated.
-	  private bool checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
-	  {
-		if (condition != 0)
-		{
-		  CharacterRunAutomaton pattern = dictionary.patterns[condition];
-		  int state = pattern.InitialState;
-		  for (int i = c1off; i < c1off + c1len; i++)
-		  {
-			state = pattern.step(state, c1[i]);
-			if (state == -1)
-			{
-			  return false;
-			}
-		  }
-		  for (int i = c2off; i < c2off + c2len; i++)
-		  {
-			state = pattern.step(state, c2[i]);
-			if (state == -1)
-			{
-			  return false;
-			}
-		  }
-		  return pattern.isAccept(state);
-		}
-		return true;
-	  }
-
-	  /// <summary>
-	  /// Applies the affix rule to the given word, producing a list of stems if any are found
-	  /// </summary>
-	  /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
-	  /// <param name="length"> valid length of stripped word </param>
-	  /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
-	  /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
-	  ///                   so we must check dictionary form against both to add it as a stem! </param>
-	  /// <param name="recursionDepth"> current recursion depth </param>
-	  /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
-	  /// <returns> List of stems for the word, or an empty list if none are found </returns>
-	  internal IList<CharsRef> applyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
-	  {
-		// TODO: just pass this in from before, no need to decode it twice
-		affixReader.Position = 8 * affix;
-		char flag = (char)(affixReader.readShort() & 0xffff);
-		affixReader.skipBytes(2); // strip
-		int condition = (char)(affixReader.readShort() & 0xffff);
-		bool crossProduct = (condition & 1) == 1;
-		condition = (int)((uint)condition >> 1);
-		char append = (char)(affixReader.readShort() & 0xffff);
-
-		IList<CharsRef> stems = new List<CharsRef>();
-
-		IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
-		if (forms != null)
-		{
-		  for (int i = 0; i < forms.length; i++)
-		  {
-			dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
-			char[] wordFlags = Dictionary.decodeFlags(scratch);
-			if (Dictionary.hasFlag(wordFlags, flag))
-			{
-			  // confusing: in this one exception, we already chained the first prefix against the second,
-			  // so it doesnt need to be checked against the word
-			  bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
-			  if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag))
-			  {
-				// see if we can chain prefix thru the suffix continuation class (only if it has any!)
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
-				{
-				  continue;
-				}
-			  }
-
-			  // if circumfix was previously set by a prefix, we must check this suffix,
-			  // to ensure it has it, and vice versa
-			  if (dictionary.circumfix != -1)
-			  {
-				dictionary.flagLookup.get(append, scratch);
-				char[] appendFlags = Dictionary.decodeFlags(scratch);
-				bool suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
-				if (circumfix != suffixCircumfix)
-				{
-				  continue;
-				}
-			  }
-			  stems.Add(newStem(strippedWord, length));
-			}
-		  }
-		}
-
-		// if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
-		if (dictionary.circumfix != -1 && !circumfix && prefix)
-		{
-		  dictionary.flagLookup.get(append, scratch);
-		  char[] appendFlags = Dictionary.decodeFlags(scratch);
-		  circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
-		}
-
-		if (crossProduct)
-		{
-		  if (recursionDepth == 0)
-		  {
-			if (prefix)
-			{
-			  // we took away the first prefix.
-			  // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
-			  // COMPLEXPREFIXES = false: combine with a suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
-			}
-			else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
-			{
-			  // we took away a suffix.
-			  // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
-			  // COMPLEXPREFIXES = false: combine with another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
-			}
-		  }
-		  else if (recursionDepth == 1)
-		  {
-			if (prefix && dictionary.complexPrefixes)
-			{
-			  // we took away the second prefix: go look for another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
-			}
-			else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
-			{
-			  // we took away a prefix, then a suffix: go look for another suffix
-			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
-			}
-		  }
-		}
-
-		return stems;
-	  }
-
-	  /// <summary>
-	  /// Checks if the given flag cross checks with the given array of flags
-	  /// </summary>
-	  /// <param name="flag"> Flag to cross check with the array of flags </param>
-	  /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
-	  /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
-	  private bool hasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
-	  {
-		return (flags.Length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
-	  }
-	}
-
+    /// <summary>
+    /// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
+    /// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
+    /// </summary>
+    internal sealed class Stemmer
+    {
+        private readonly Dictionary dictionary;
+        private readonly BytesRef scratch = new BytesRef();
+        private readonly StringBuilder segment = new StringBuilder();
+        private readonly ByteArrayDataInput affixReader;
+
+        // used for normalization
+        private readonly StringBuilder scratchSegment = new StringBuilder();
+        private char[] scratchBuffer = new char[32];
+
+        /// <summary>
+        /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
+        /// </summary>
+        /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
+        public Stemmer(Dictionary dictionary)
+        {
+            this.dictionary = dictionary;
+            this.affixReader = new ByteArrayDataInput(dictionary.affixData);
+        }
+
+        /// <summary>
+        /// Find the stem(s) of the provided word.
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> Stem(string word)
+        {
+            return Stem(word.ToCharArray(), word.Length);
+        }
+
+        /// <summary>
+        /// Find the stem(s) of the provided word
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> Stem(char[] word, int length)
+        {
+
+            if (dictionary.needsInputCleaning)
+            {
+                scratchSegment.Length = 0;
+                scratchSegment.Append(word, 0, length);
+                string cleaned = dictionary.CleanInput(scratchSegment.ToString(), segment);
+                scratchBuffer = ArrayUtil.Grow(scratchBuffer, cleaned.Length);
+                length = segment.Length;
+                segment.CopyTo(0, scratchBuffer, 0, length);
+                word = scratchBuffer;
+            }
+
+            List<CharsRef> stems = new List<CharsRef>();
+            IntsRef forms = dictionary.LookupWord(word, 0, length);
+            if (forms != null)
+            {
+                // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
+                // just because it exists, does not make it valid...
+                for (int i = 0; i < forms.Length; i++)
+                {
+                    stems.Add(NewStem(word, length));
+                }
+            }
+            stems.AddRange(Stem(word, length, -1, -1, -1, 0, true, true, false, false));
+            return stems;
+        }
+
+        /// <summary>
+        /// Find the unique stem(s) of the provided word
+        /// </summary>
+        /// <param name="word"> Word to find the stems for </param>
+        /// <returns> List of stems for the word </returns>
+        public IList<CharsRef> UniqueStems(char[] word, int length)
+        {
+            IList<CharsRef> stems = Stem(word, length);
+            if (stems.Count < 2)
+            {
+                return stems;
+            }
+            CharArraySet terms = new CharArraySet(LuceneVersion.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+            IList<CharsRef> deduped = new List<CharsRef>();
+            foreach (CharsRef s in stems)
+            {
+                if (!terms.Contains(s))
+                {
+                    deduped.Add(s);
+                    terms.Add(s);
+                }
+            }
+            return deduped;
+        }
+
+        private CharsRef NewStem(char[] buffer, int length)
+        {
+            if (dictionary.needsOutputCleaning)
+            {
+                scratchSegment.Length = 0;
+                scratchSegment.Append(buffer, 0, length);
+                try
+                {
+                    Dictionary.ApplyMappings(dictionary.oconv, scratchSegment);
+                }
+                catch (IOException bogus)
+                {
+                    throw new Exception(bogus.Message, bogus);
+                }
+                char[] cleaned = new char[scratchSegment.Length];
+                scratchSegment.CopyTo(0, cleaned, 0, cleaned.Length);
+                return new CharsRef(cleaned, 0, cleaned.Length);
+            }
+            else
+            {
+                return new CharsRef(buffer, 0, length);
+            }
+        }
+
+        // ================================================= Helper Methods ================================================
+
+        /// <summary>
+        /// Generates a list of stems for the provided word
+        /// </summary>
+        /// <param name="word"> Word to generate the stems for </param>
+        /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
+        /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
+        /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
+        /// <param name="recursionDepth"> current recursiondepth </param>
+        /// <param name="doPrefix"> true if we should remove prefixes </param>
+        /// <param name="doSuffix"> true if we should remove suffixes </param>
+        /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
+        ///        if we are removing a suffix, and it has no continuation requirements, its ok.
+        ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
+        /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
+        ///        this means inner most suffix must also contain circumfix flag. </param>
+        /// <returns> List of stems, or empty list if no stems are found </returns>
+        private IList<CharsRef> Stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
+        {
+
+            // TODO: allow this stuff to be reused by tokenfilter
+            List<CharsRef> stems = new List<CharsRef>();
+
+            if (doPrefix && dictionary.prefixes != null)
+            {
+                for (int i = length - 1; i >= 0; i--)
+                {
+                    IntsRef prefixes = dictionary.LookupPrefix(word, 0, i);
+                    if (prefixes == null)
+                    {
+                        continue;
+                    }
+
+                    for (int j = 0; j < prefixes.Length; j++)
+                    {
+                        int prefix = prefixes.Ints[prefixes.Offset + j];
+                        if (prefix == previous)
+                        {
+                            continue;
+                        }
+                        affixReader.Position = 8 * prefix;
+                        char flag = (char)(affixReader.ReadShort() & 0xffff);
+                        char stripOrd = (char)(affixReader.ReadShort() & 0xffff);
+                        int condition = (char)(affixReader.ReadShort() & 0xffff);
+                        bool crossProduct = (condition & 1) == 1;
+                        condition = (int)((uint)condition >> 1);
+                        char append = (char)(affixReader.ReadShort() & 0xffff);
+
+                        bool compatible;
+                        if (recursionDepth == 0)
+                        {
+                            compatible = true;
+                        }
+                        else if (crossProduct)
+                        {
+                            // cross check incoming continuation class (flag of previous affix) against list.
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            Debug.Assert(prevFlag >= 0);
+                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+                        }
+                        else
+                        {
+                            compatible = false;
+                        }
+
+                        if (compatible)
+                        {
+                            int deAffixedStart = i;
+                            int deAffixedLength = length - deAffixedStart;
+
+                            int stripStart = dictionary.stripOffsets[stripOrd];
+                            int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+                            int stripLength = stripEnd - stripStart;
+
+                            if (!CheckCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
+                            {
+                                continue;
+                            }
+
+                            char[] strippedWord = new char[stripLength + deAffixedLength];
+                            Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
+                            Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
+
+                            IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            if (doSuffix && dictionary.suffixes != null)
+            {
+                for (int i = 0; i < length; i++)
+                {
+                    IntsRef suffixes = dictionary.LookupSuffix(word, i, length - i);
+                    if (suffixes == null)
+                    {
+                        continue;
+                    }
+
+                    for (int j = 0; j < suffixes.Length; j++)
+                    {
+                        int suffix = suffixes.Ints[suffixes.Offset + j];
+                        if (suffix == previous)
+                        {
+                            continue;
+                        }
+                        affixReader.Position = 8 * suffix;
+                        char flag = (char)(affixReader.ReadShort() & 0xffff);
+                        char stripOrd = (char)(affixReader.ReadShort() & 0xffff);
+                        int condition = (char)(affixReader.ReadShort() & 0xffff);
+                        bool crossProduct = (condition & 1) == 1;
+                        condition = (int)((uint)condition >> 1);
+                        char append = (char)(affixReader.ReadShort() & 0xffff);
+
+                        bool compatible;
+                        if (recursionDepth == 0)
+                        {
+                            compatible = true;
+                        }
+                        else if (crossProduct)
+                        {
+                            // cross check incoming continuation class (flag of previous affix) against list.
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            Debug.Assert(prevFlag >= 0);
+                            compatible = HasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+                        }
+                        else
+                        {
+                            compatible = false;
+                        }
+
+                        if (compatible)
+                        {
+                            int appendLength = length - i;
+                            int deAffixedLength = length - appendLength;
+
+                            int stripStart = dictionary.stripOffsets[stripOrd];
+                            int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+                            int stripLength = stripEnd - stripStart;
+
+                            if (!CheckCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
+                            {
+                                continue;
+                            }
+
+                            char[] strippedWord = new char[stripLength + deAffixedLength];
+                            Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
+                            Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
+
+                            IList<CharsRef> stemList = ApplyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        /// checks condition of the concatenation of two strings </summary>
+        // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
+        // but this is a little bit more complicated.
+        private bool CheckCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
+        {
+            if (condition != 0)
+            {
+                CharacterRunAutomaton pattern = dictionary.patterns[condition];
+                int state = pattern.InitialState;
+                for (int i = c1off; i < c1off + c1len; i++)
+                {
+                    state = pattern.Step(state, c1[i]);
+                    if (state == -1)
+                    {
+                        return false;
+                    }
+                }
+                for (int i = c2off; i < c2off + c2len; i++)
+                {
+                    state = pattern.Step(state, c2[i]);
+                    if (state == -1)
+                    {
+                        return false;
+                    }
+                }
+                return pattern.IsAccept(state);
+            }
+            return true;
+        }
+
+        /// <summary>
+        /// Applies the affix rule to the given word, producing a list of stems if any are found
+        /// </summary>
+        /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
+        /// <param name="length"> valid length of stripped word </param>
+        /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
+        /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
+        ///                   so we must check dictionary form against both to add it as a stem! </param>
+        /// <param name="recursionDepth"> current recursion depth </param>
+        /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
+        /// <returns> List of stems for the word, or an empty list if none are found </returns>
+        internal IList<CharsRef> ApplyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
+        {
+            // TODO: just pass this in from before, no need to decode it twice
+            affixReader.Position = 8 * affix;
+            char flag = (char)(affixReader.ReadShort() & 0xffff);
+            affixReader.SkipBytes(2); // strip
+            int condition = (char)(affixReader.ReadShort() & 0xffff);
+            bool crossProduct = (condition & 1) == 1;
+            condition = (int)((uint)condition >> 1);
+            char append = (char)(affixReader.ReadShort() & 0xffff);
+
+            List<CharsRef> stems = new List<CharsRef>();
+
+            IntsRef forms = dictionary.LookupWord(strippedWord, 0, length);
+            if (forms != null)
+            {
+                for (int i = 0; i < forms.Length; i++)
+                {
+                    dictionary.flagLookup.Get(forms.Ints[forms.Offset + i], scratch);
+                    char[] wordFlags = Dictionary.DecodeFlags(scratch);
+                    if (Dictionary.HasFlag(wordFlags, flag))
+                    {
+                        // confusing: in this one exception, we already chained the first prefix against the second,
+                        // so it doesnt need to be checked against the word
+                        bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
+                        if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.HasFlag(wordFlags, (char)prefixFlag))
+                        {
+                            // see if we can chain prefix thru the suffix continuation class (only if it has any!)
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            if (!HasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
+                            {
+                                continue;
+                            }
+                        }
+
+                        // if circumfix was previously set by a prefix, we must check this suffix,
+                        // to ensure it has it, and vice versa
+                        if (dictionary.circumfix != -1)
+                        {
+                            dictionary.flagLookup.Get(append, scratch);
+                            char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                            bool suffixCircumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
+                            if (circumfix != suffixCircumfix)
+                            {
+                                continue;
+                            }
+                        }
+                        stems.Add(NewStem(strippedWord, length));
+                    }
+                }
+            }
+
+            // if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
+            if (dictionary.circumfix != -1 && !circumfix && prefix)
+            {
+                dictionary.flagLookup.Get(append, scratch);
+                char[] appendFlags = Dictionary.DecodeFlags(scratch);
+                circumfix = Dictionary.HasFlag(appendFlags, (char)dictionary.circumfix);
+            }
+
+            if (crossProduct)
+            {
+                if (recursionDepth == 0)
+                {
+                    if (prefix)
+                    {
+                        // we took away the first prefix.
+                        // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
+                        // COMPLEXPREFIXES = false: combine with a suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+                    }
+                    else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+                    {
+                        // we took away a suffix.
+                        // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
+                        // COMPLEXPREFIXES = false: combine with another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+                    }
+                }
+                else if (recursionDepth == 1)
+                {
+                    if (prefix && dictionary.complexPrefixes)
+                    {
+                        // we took away the second prefix: go look for another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+                    }
+                    else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+                    {
+                        // we took away a prefix, then a suffix: go look for another suffix
+                        stems.AddRange(Stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        /// Checks if the given flag cross checks with the given array of flags
+        /// </summary>
+        /// <param name="flag"> Flag to cross check with the array of flags </param>
+        /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
+        /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
+        private bool HasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
+        {
+            return (flags.Length == 0 && matchEmpty) || Arrays.BinarySearch(flags, flag) >= 0;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 1c6d7fc..a74ed0b 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -191,6 +191,11 @@
     <Compile Include="Analysis\Hi\HindiStemFilter.cs" />
     <Compile Include="Analysis\Hi\HindiStemFilterFactory.cs" />
     <Compile Include="Analysis\Hi\HindiStemmer.cs" />
+    <Compile Include="Analysis\Hunspell\Dictionary.cs" />
+    <Compile Include="Analysis\Hunspell\HunspellStemFilter.cs" />
+    <Compile Include="Analysis\Hunspell\HunspellStemFilterFactory.cs" />
+    <Compile Include="Analysis\Hunspell\ISO8859_14Decoder.cs" />
+    <Compile Include="Analysis\Hunspell\Stemmer.cs" />
     <Compile Include="Analysis\Hu\HungarianAnalyzer.cs" />
     <Compile Include="Analysis\Hu\HungarianLightStemFilter.cs" />
     <Compile Include="Analysis\Hu\HungarianLightStemFilterFactory.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs b/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
index 277dd3a..b27c855 100644
--- a/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
+++ b/src/Lucene.Net.Core/Support/IDictionaryExtensions.cs
@@ -27,7 +27,7 @@ namespace Lucene.Net.Support
             if (dict == null)
                 return default(TValue);
 
-            var oldValue = dict[key];
+            var oldValue = dict.ContainsKey(key) ? dict[key] : default(TValue);
             dict[key] = value;
             return oldValue;
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
index a1e0353..2c691a7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/StemmerTestBase.cs
@@ -1,9 +1,13 @@
-\ufeffusing System.Collections.Generic;
+\ufeffusing Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
 
-namespace org.apache.lucene.analysis.hunspell
+namespace Lucene.Net.Analysis.Hunspell
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -20,76 +24,68 @@ namespace org.apache.lucene.analysis.hunspell
 	 * limitations under the License.
 	 */
 
+    /// <summary>
+    /// base class for hunspell stemmer tests </summary>
+    public abstract class StemmerTestBase : LuceneTestCase
+    {
+        private static Stemmer stemmer;
 
-	using CharsRef = org.apache.lucene.util.CharsRef;
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-
-	/// <summary>
-	/// base class for hunspell stemmer tests </summary>
-	internal abstract class StemmerTestBase : LuceneTestCase
-	{
-	  private static Stemmer stemmer;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static void init(String affix, String dictionary) throws java.io.IOException, java.text.ParseException
-	  internal static void init(string affix, string dictionary)
-	  {
-		init(false, affix, dictionary);
-	  }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: static void init(boolean ignoreCase, String affix, String... dictionaries) throws java.io.IOException, java.text.ParseException
-	  internal static void init(bool ignoreCase, string affix, params string[] dictionaries)
-	  {
-		if (dictionaries.Length == 0)
-		{
-		  throw new System.ArgumentException("there must be at least one dictionary");
-		}
+        internal static void Init(string affix, string dictionary)
+        {
+            Init(false, affix, dictionary);
+        }
 
-		System.IO.Stream affixStream = typeof(StemmerTestBase).getResourceAsStream(affix);
-		if (affixStream == null)
-		{
-		  throw new FileNotFoundException("file not found: " + affix);
-		}
+        internal static void Init(bool ignoreCase, string affix, params string[] dictionaries)
+        {
+            if (dictionaries.Length == 0)
+            {
+                throw new System.ArgumentException("there must be at least one dictionary");
+            }
 
-		System.IO.Stream[] dictStreams = new System.IO.Stream[dictionaries.Length];
-		for (int i = 0; i < dictionaries.Length; i++)
-		{
-		  dictStreams[i] = typeof(StemmerTestBase).getResourceAsStream(dictionaries[i]);
-		  if (dictStreams[i] == null)
-		  {
-			throw new FileNotFoundException("file not found: " + dictStreams[i]);
-		  }
-		}
+            System.IO.Stream affixStream = typeof(StemmerTestBase).getResourceAsStream(affix);
+            if (affixStream == null)
+            {
+                throw new FileNotFoundException("file not found: " + affix);
+            }
 
-		try
-		{
-		  Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
-		  stemmer = new Stemmer(dictionary);
-		}
-		finally
-		{
-		  IOUtils.closeWhileHandlingException(affixStream);
-		  IOUtils.closeWhileHandlingException(dictStreams);
-		}
-	  }
+            System.IO.Stream[] dictStreams = new System.IO.Stream[dictionaries.Length];
+            for (int i = 0; i < dictionaries.Length; i++)
+            {
+                dictStreams[i] = typeof(StemmerTestBase).getResourceAsStream(dictionaries[i]);
+                if (dictStreams[i] == null)
+                {
+                    throw new FileNotFoundException("file not found: " + dictStreams[i]);
+                }
+            }
 
-	  internal static void assertStemsTo(string s, params string[] expected)
-	  {
-		assertNotNull(stemmer);
-		Arrays.sort(expected);
+            try
+            {
+                Dictionary dictionary = new Dictionary(affixStream, Arrays.AsList(dictStreams), ignoreCase);
+                stemmer = new Stemmer(dictionary);
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(affixStream);
+                IOUtils.CloseWhileHandlingException(null, dictStreams);
+            }
+        }
 
-		IList<CharsRef> stems = stemmer.stem(s);
-		string[] actual = new string[stems.Count];
-		for (int i = 0; i < actual.Length; i++)
-		{
-		  actual[i] = stems[i].ToString();
-		}
-		Arrays.sort(actual);
+        internal static void AssertStemsTo(string s, params string[] expected)
+        {
+            assertNotNull(stemmer);
+            Array.Sort(expected);
 
-		assertArrayEquals("expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual), expected, actual);
-	  }
-	}
+            IList<CharsRef> stems = stemmer.Stem(s);
+            string[] actual = new string[stems.Count];
+            for (int i = 0; i < actual.Length; i++)
+            {
+                actual[i] = stems[i].ToString();
+            }
+            Array.Sort(actual);
 
+            // LUCENENET: Originally, the code was as follows, but it failed to properly compare the arrays.
+            //assertArrayEquals("expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual), expected, actual);
+            Assert.AreEqual(expected, actual, "expected=" + Arrays.ToString(expected) + ",actual=" + Arrays.ToString(actual));
+        }
+    }
 }
\ No newline at end of file


[40/50] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests

Posted by sy...@apache.org.
Ported Analysis.Compound namespace + tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/87c1d606
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/87c1d606
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/87c1d606

Branch: refs/heads/analysis-work
Commit: 87c1d6068e578fde46351ec59f9e2696175b7839
Parents: a153b02
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Mon Aug 15 10:02:08 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 23:22:50 2016 +0700

----------------------------------------------------------------------
 .../Compound/CompoundWordTokenFilterBase.cs     |   14 +-
 .../DictionaryCompoundWordTokenFilter.cs        |    4 +-
 .../DictionaryCompoundWordTokenFilterFactory.cs |   22 +-
 .../HyphenationCompoundWordTokenFilter.cs       |   87 +-
 ...HyphenationCompoundWordTokenFilterFactory.cs |  179 +-
 .../Analysis/Compound/hyphenation/ByteVector.cs |  301 ++--
 .../Analysis/Compound/hyphenation/CharVector.cs |  324 ++--
 .../Analysis/Compound/hyphenation/Hyphen.cs     |  126 +-
 .../Compound/hyphenation/Hyphenation.cs         |   94 +-
 .../Compound/hyphenation/HyphenationTree.cs     | 1084 ++++++------
 .../Compound/hyphenation/PatternConsumer.cs     |   56 +-
 .../Compound/hyphenation/PatternParser.cs       |  927 +++++-----
 .../Compound/hyphenation/TernaryTree.cs         | 1578 +++++++++---------
 .../Compound/hyphenation/hyphenation.dtd        |   68 +
 .../Lucene.Net.Analysis.Common.csproj           |   16 +
 .../Tokenattributes/ICharTermAttribute.cs       |    9 +
 .../Compound/TestCompoundWordTokenFilter.cs     |  766 +++++----
 ...tDictionaryCompoundWordTokenFilterFactory.cs |   79 +-
 ...HyphenationCompoundWordTokenFilterFactory.cs |  106 +-
 .../Analysis/Compound/compoundDictionary.txt    |   19 +
 .../Analysis/Compound/da_UTF8.xml               | 1208 ++++++++++++++
 .../Analysis/Compound/da_compoundDictionary.txt |   19 +
 .../Lucene.Net.Tests.Analysis.Common.csproj     |    9 +-
 23 files changed, 4292 insertions(+), 2803 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index c6bc4cd..4731b79 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -1,13 +1,12 @@
-\ufeffusing System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Analysis.Tokenattributes;
+\ufeffusing Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Diagnostics;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +23,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Base class for decomposition token filters.
     /// <para>
@@ -64,7 +64,7 @@ namespace Lucene.Net.Analysis.Compound
         protected internal readonly int maxSubwordSize;
         protected internal readonly bool onlyLongestMatch;
 
-        protected internal readonly CharTermAttribute termAtt;
+        protected internal readonly ICharTermAttribute termAtt;
         protected internal readonly IOffsetAttribute offsetAtt;
         private readonly IPositionIncrementAttribute posIncAtt;
 
@@ -83,7 +83,7 @@ namespace Lucene.Net.Analysis.Compound
         protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
             : base(input)
         {
-            termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
+            termAtt = AddAttribute<ICharTermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
             posIncAtt = AddAttribute<IPositionIncrementAttribute>();
 
@@ -108,7 +108,7 @@ namespace Lucene.Net.Analysis.Compound
             this.dictionary = dictionary;
         }
 
-        public override bool IncrementToken()
+        public override sealed bool IncrementToken()
         {
             if (tokens.Count > 0)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
index 09c67fd..849b5b3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
@@ -3,7 +3,6 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +19,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
     /// <para>
@@ -90,7 +90,7 @@ namespace Lucene.Net.Analysis.Compound
             }
         }
 
-        protected internal override void Decompose()
+        protected override void Decompose()
         {
             int len = termAtt.Length;
             for (int i = 0; i <= len - this.minSubwordSize; ++i)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
index ef8f1dc..f3c116a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
@@ -1,9 +1,8 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+\ufeffusing Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +19,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>. 
     /// <pre class="prettyprint">
@@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Compound
     ///   &lt;/analyzer&gt;
     /// &lt;/fieldType&gt;</pre>
     /// </summary>
-    public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+    public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
     {
         private CharArraySet dictionary;
         private readonly string dictFile;
@@ -45,19 +45,19 @@ namespace Lucene.Net.Analysis.Compound
         public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args)
             : base(args)
         {
-            assureMatchVersion();
-            dictFile = require(args, "dictionary");
-            minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
-            minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
-            maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
-            onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+            AssureMatchVersion();
+            dictFile = Require(args, "dictionary");
+            minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+            minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+            maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+            onlyLongestMatch = GetBoolean(args, "onlyLongestMatch", true);
             if (args.Count > 0)
             {
                 throw new System.ArgumentException("Unknown parameters: " + args);
             }
         }
 
-        public virtual void Inform(ResourceLoader loader)
+        public virtual void Inform(IResourceLoader loader)
         {
             dictionary = base.GetWordSet(loader, dictFile, false);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
index 38518ed..edeee5e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -1,12 +1,11 @@
-\ufeffusing System.IO;
-using Lucene.Net.Analysis.Compound.Hyphenation;
+\ufeffusing Lucene.Net.Analysis.Compound.Hyphenation;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.compound;
+using System.IO;
+using System.Text;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -23,6 +22,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
     /// <para>
@@ -58,8 +58,10 @@ namespace Lucene.Net.Analysis.Compound
         ///          the hyphenation pattern tree to use for hyphenation </param>
         /// <param name="dictionary">
         ///          the word dictionary to match against. </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary)
-            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, CharArraySet dictionary)
+            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, 
+                  DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
         {
         }
 
@@ -85,8 +87,11 @@ namespace Lucene.Net.Analysis.Compound
         ///          only subwords shorter than this get to the output stream </param>
         /// <param name="onlyLongestMatch">
         ///          Add only the longest matching subword to the stream </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
-            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, 
+            int maxSubwordSize, bool onlyLongestMatch)
+            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, 
+                  onlyLongestMatch)
         {
 
             this.hyphenator = hyphenator;
@@ -100,8 +105,11 @@ namespace Lucene.Net.Analysis.Compound
         /// null, minWordSize, minSubwordSize, maxSubwordSize }
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize)
-            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, int minWordSize, int minSubwordSize, 
+            int maxSubwordSize)
+            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, 
+                  maxSubwordSize, false)
         {
         }
 
@@ -113,8 +121,10 @@ namespace Lucene.Net.Analysis.Compound
         /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator)
-            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator)
+            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, 
+                  DEFAULT_MAX_SUBWORD_SIZE)
         {
         }
 
@@ -126,7 +136,18 @@ namespace Lucene.Net.Analysis.Compound
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
         {
-            return getHyphenationTree(new InputSource(hyphenationFilename));
+            return GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
+        {
+            return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
         }
 
         /// <summary>
@@ -135,9 +156,31 @@ namespace Lucene.Net.Analysis.Compound
         /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-        public static HyphenationTree GetHyphenationTree(File hyphenationFile)
+        public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
+        {
+            return GetHyphenationTree(hyphenationFile, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
+        {
+            return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(Stream hyphenationSource)
         {
-            return getHyphenationTree(new InputSource(hyphenationFile.ToURI().toASCIIString()));
+            return GetHyphenationTree(hyphenationSource, Encoding.UTF8);
         }
 
         /// <summary>
@@ -146,17 +189,17 @@ namespace Lucene.Net.Analysis.Compound
         /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-        public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+        public static HyphenationTree GetHyphenationTree(Stream hyphenationSource, Encoding encoding)
         {
             var tree = new HyphenationTree();
-            tree.loadPatterns(hyphenationSource);
+            tree.LoadPatterns(hyphenationSource, encoding);
             return tree;
         }
 
-        protected internal override void decompose()
+        protected override void Decompose()
         {
             // get the hyphenation points
-            Hyphenation hyphens = hyphenator.hyphenate(termAtt.Buffer(), 0, termAtt.Length(), 1, 1);
+            Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(termAtt.Buffer(), 0, termAtt.Length, 1, 1);
             // No hyphen points found -> exit
             if (hyphens == null)
             {
@@ -197,7 +240,7 @@ namespace Lucene.Net.Analysis.Compound
                         {
                             if (longestMatchToken != null)
                             {
-                                if (longestMatchToken.txt.Length() < partLength)
+                                if (longestMatchToken.txt.Length < partLength)
                                 {
                                     longestMatchToken = new CompoundToken(this, start, partLength);
                                 }
@@ -212,7 +255,7 @@ namespace Lucene.Net.Analysis.Compound
                             tokens.AddLast(new CompoundToken(this, start, partLength));
                         }
                     }
-                    else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
+                    else if (dictionary.Contains(termAtt.Buffer(), start, partLength - 1))
                     {
                         // check the dictionary again with a word that is one character
                         // shorter
@@ -222,7 +265,7 @@ namespace Lucene.Net.Analysis.Compound
                         {
                             if (longestMatchToken != null)
                             {
-                                if (longestMatchToken.txt.Length() < partLength - 1)
+                                if (longestMatchToken.txt.Length < partLength - 1)
                                 {
                                     longestMatchToken = new CompoundToken(this, start, partLength - 1);
                                 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
index d1cdeee..4de8724 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
@@ -1,12 +1,13 @@
-\ufeffusing System.Collections.Generic;
-using Lucene.Net.Analysis.Compound.Hyphenation;
+\ufeffusing Lucene.Net.Analysis.Compound.Hyphenation;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -22,92 +23,96 @@ namespace Lucene.Net.Analysis.Compound
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
+
     /// <summary>
-	/// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
-	/// <para>
-	/// This factory accepts the following parameters:
-	/// <ul>
-	///  <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. 
-	///  See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
-	///  <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
-	///  <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
-	///  <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
-	///  <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
-	///  <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
-	///  <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword 
-	///    to the stream. defaults to false.
-	/// </ul>
-	/// </para>
-	/// <para>
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
-	///         dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// 
-	/// </para>
-	/// </summary>
-	/// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
-	public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
-	{
-	  private CharArraySet dictionary;
-	  private HyphenationTree hyphenator;
-	  private readonly string dictFile;
-	  private readonly string hypFile;
-	  private readonly string encoding;
-	  private readonly int minWordSize;
-	  private readonly int minSubwordSize;
-	  private readonly int maxSubwordSize;
-	  private readonly bool onlyLongestMatch;
+    /// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
+    /// <para>
+    /// This factory accepts the following parameters:
+    /// <ul>
+    ///  <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. 
+    ///  See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
+    ///  <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
+    ///  <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
+    ///  <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
+    ///  <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
+    ///  <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
+    ///  <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword 
+    ///    to the stream. defaults to false.
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
+    ///         dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// 
+    /// </para>
+    /// </summary>
+    /// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
+    public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private CharArraySet dictionary;
+        private HyphenationTree hyphenator;
+        private readonly string dictFile;
+        private readonly string hypFile;
+        private readonly string encoding;
+        private readonly int minWordSize;
+        private readonly int minSubwordSize;
+        private readonly int maxSubwordSize;
+        private readonly bool onlyLongestMatch;
+
+        /// <summary>
+        /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
+        public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
+        {
+            AssureMatchVersion();
+            dictFile = Get(args, "dictionary");
+            encoding = Get(args, "encoding");
+            hypFile = Require(args, "hyphenator"); // LUCENENET TODO: Not sure what to do with this
+            minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+            minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+            maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+            onlyLongestMatch = GetBoolean(args, "onlyLongestMatch", false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            Stream stream = null;
+            try
+            {
+                if (dictFile != null) // the dictionary can be empty.
+                {
+                    dictionary = GetWordSet(loader, dictFile, false);
+                }
+                // TODO: Broken, because we cannot resolve real system id
+                // ResourceLoader should also supply method like ClassLoader to get resource URL
+                stream = loader.OpenResource(hypFile);
+                //InputSource @is = new InputSource(stream);
+                //@is.Encoding = encoding; // if it's null let xml parser decide
+                //@is.SystemId = hypFile;
 
-	  /// <summary>
-	  /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
-	  public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		assureMatchVersion();
-		dictFile = get(args, "dictionary");
-		encoding = get(args, "encoding");
-		hypFile = require(args, "hyphenator");
-		minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
-		minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
-		maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
-		onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+                var xmlEncoding = string.IsNullOrEmpty(encoding) ? Encoding.UTF8 : Encoding.GetEncoding(encoding);
 
-	  public virtual void Inform(ResourceLoader loader)
-	  {
-		InputStream stream = null;
-		try
-		{
-		  if (dictFile != null) // the dictionary can be empty.
-		  {
-			dictionary = getWordSet(loader, dictFile, false);
-		  }
-		  // TODO: Broken, because we cannot resolve real system id
-		  // ResourceLoader should also supply method like ClassLoader to get resource URL
-		  stream = loader.openResource(hypFile);
-		  InputSource @is = new InputSource(stream);
-		  @is.Encoding = encoding; // if it's null let xml parser decide
-		  @is.SystemId = hypFile;
-		  hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		}
-		finally
-		{
-		  IOUtils.CloseWhileHandlingException(stream);
-		}
-	  }
+                hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(stream, xmlEncoding);
 
-	  public override TokenStream Create(TokenStream input)
-	  {
-		return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
-	  }
-	}
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(stream);
+            }
+        }
 
+        public override TokenStream Create(TokenStream input)
+        {
+            return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
index c59a69d..6442d11 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
@@ -1,149 +1,156 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
+\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-	/// <summary>
-	/// This class implements a simple byte vector with access to the underlying
-	/// array.
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class ByteVector
-	{
-
-	  /// <summary>
-	  /// Capacity increment size
-	  /// </summary>
-	  private const int DEFAULT_BLOCK_SIZE = 2048;
-
-	  private int blockSize;
-
-	  /// <summary>
-	  /// The encapsulated array
-	  /// </summary>
-	  private sbyte[] array;
-
-	  /// <summary>
-	  /// Points to next free item
-	  /// </summary>
-	  private int n;
-
-	  public ByteVector() : this(DEFAULT_BLOCK_SIZE)
-	  {
-	  }
-
-	  public ByteVector(int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = new sbyte[blockSize];
-		n = 0;
-	  }
-
-	  public ByteVector(sbyte[] a)
-	  {
-		blockSize = DEFAULT_BLOCK_SIZE;
-		array = a;
-		n = 0;
-	  }
-
-	  public ByteVector(sbyte[] a, int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = a;
-		n = 0;
-	  }
-
-	  public virtual sbyte[] Array
-	  {
-		  get
-		  {
-			return array;
-		  }
-	  }
-
-	  /// <summary>
-	  /// return number of items in array
-	  /// </summary>
-	  public virtual int length()
-	  {
-		return n;
-	  }
-
-	  /// <summary>
-	  /// returns current capacity of array
-	  /// </summary>
-	  public virtual int capacity()
-	  {
-		return array.Length;
-	  }
-
-	  public virtual void put(int index, sbyte val)
-	  {
-		array[index] = val;
-	  }
-
-	  public virtual sbyte get(int index)
-	  {
-		return array[index];
-	  }
-
-	  /// <summary>
-	  /// This is to implement memory allocation in the array. Like malloc().
-	  /// </summary>
-	  public virtual int alloc(int size)
-	  {
-		int index = n;
-		int len = array.Length;
-		if (n + size >= len)
-		{
-		  sbyte[] aux = new sbyte[len + blockSize];
-		  Array.Copy(array, 0, aux, 0, len);
-		  array = aux;
-		}
-		n += size;
-		return index;
-	  }
-
-	  public virtual void trimToSize()
-	  {
-		if (n < array.Length)
-		{
-		  sbyte[] aux = new sbyte[n];
-		  Array.Copy(array, 0, aux, 0, n);
-		  array = aux;
-		}
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple byte vector with access to the underlying
+    /// array.
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class ByteVector
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private sbyte[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public ByteVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new sbyte[blockSize];
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = 0;
+        }
+
+        public virtual sbyte[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual sbyte this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length
+        {
+            get { return n; }
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, sbyte val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual sbyte Get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        /// <summary>
+        /// This is to implement memory allocation in the array. Like malloc().
+        /// </summary>
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                sbyte[] aux = new sbyte[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                sbyte[] aux = new sbyte[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
index 568b50b..26fcea5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
@@ -1,163 +1,171 @@
 \ufeffusing System;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-	/// <summary>
-	/// This class implements a simple char vector with access to the underlying
-	/// array.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class CharVector : ICloneable
-	{
-
-	  /// <summary>
-	  /// Capacity increment size
-	  /// </summary>
-	  private const int DEFAULT_BLOCK_SIZE = 2048;
-
-	  private int blockSize;
-
-	  /// <summary>
-	  /// The encapsulated array
-	  /// </summary>
-	  private char[] array;
-
-	  /// <summary>
-	  /// Points to next free item
-	  /// </summary>
-	  private int n;
-
-	  public CharVector() : this(DEFAULT_BLOCK_SIZE)
-	  {
-	  }
-
-	  public CharVector(int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = new char[blockSize];
-		n = 0;
-	  }
-
-	  public CharVector(char[] a)
-	  {
-		blockSize = DEFAULT_BLOCK_SIZE;
-		array = a;
-		n = a.Length;
-	  }
-
-	  public CharVector(char[] a, int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = a;
-		n = a.Length;
-	  }
-
-	  /// <summary>
-	  /// Reset Vector but don't resize or clear elements
-	  /// </summary>
-	  public virtual void clear()
-	  {
-		n = 0;
-	  }
-
-	  public override CharVector clone()
-	  {
-		CharVector cv = new CharVector(array.Clone(), blockSize);
-		cv.n = this.n;
-		return cv;
-	  }
-
-	  public virtual char[] Array
-	  {
-		  get
-		  {
-			return array;
-		  }
-	  }
-
-	  /// <summary>
-	  /// return number of items in array
-	  /// </summary>
-	  public virtual int length()
-	  {
-		return n;
-	  }
-
-	  /// <summary>
-	  /// returns current capacity of array
-	  /// </summary>
-	  public virtual int capacity()
-	  {
-		return array.Length;
-	  }
-
-	  public virtual void put(int index, char val)
-	  {
-		array[index] = val;
-	  }
-
-	  public virtual char get(int index)
-	  {
-		return array[index];
-	  }
-
-	  public virtual int alloc(int size)
-	  {
-		int index = n;
-		int len = array.Length;
-		if (n + size >= len)
-		{
-		  char[] aux = new char[len + blockSize];
-		  Array.Copy(array, 0, aux, 0, len);
-		  array = aux;
-		}
-		n += size;
-		return index;
-	  }
-
-	  public virtual void trimToSize()
-	  {
-		if (n < array.Length)
-		{
-		  char[] aux = new char[n];
-		  Array.Copy(array, 0, aux, 0, n);
-		  array = aux;
-		}
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple char vector with access to the underlying
+    /// array.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class CharVector : ICloneable
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private char[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public CharVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public CharVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new char[blockSize];
+            n = 0;
+        }
+
+        public CharVector(char[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = a.Length;
+        }
+
+        public CharVector(char[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = a.Length;
+        }
+
+        /// <summary>
+        /// Reset Vector but don't resize or clear elements
+        /// </summary>
+        public virtual void Clear()
+        {
+            n = 0;
+        }
+
+        public virtual object Clone()
+        {
+            CharVector cv = new CharVector(array, blockSize);
+            cv.n = this.n;
+            return cv;
+        }
+
+        public virtual char[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual char this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length()
+        {
+            return n;
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, char val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual char get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                char[] aux = new char[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                char[] aux = new char[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
index 8d73bd8..91009b1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
@@ -1,76 +1,72 @@
 \ufeffusing System.Text;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
-	/// pre-break text, post-break text and no-break. If no line-break is generated
-	/// at this position, the no-break text is used, otherwise, pre-break and
-	/// post-break are used. Typically, pre-break is equal to the hyphen character
-	/// and the others are empty. However, this general scheme allows support for
-	/// cases in some languages where words change spelling if they're split across
-	/// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
-	/// from TeX.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-
-	public class Hyphen
-	{
-	  public string preBreak;
-
-	  public string noBreak;
-
-	  public string postBreak;
+    /// <summary>
+    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
+    /// pre-break text, post-break text and no-break. If no line-break is generated
+    /// at this position, the no-break text is used, otherwise, pre-break and
+    /// post-break are used. Typically, pre-break is equal to the hyphen character
+    /// and the others are empty. However, this general scheme allows support for
+    /// cases in some languages where words change spelling if they're split across
+    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
+    /// from TeX.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class Hyphen
+    {
+        public string preBreak;
 
-	  internal Hyphen(string pre, string no, string post)
-	  {
-		preBreak = pre;
-		noBreak = no;
-		postBreak = post;
-	  }
+        public string noBreak;
 
-	  internal Hyphen(string pre)
-	  {
-		preBreak = pre;
-		noBreak = null;
-		postBreak = null;
-	  }
+        public string postBreak;
 
-	  public override string ToString()
-	  {
-		if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
-		{
-		  return "-";
-		}
-		StringBuilder res = new StringBuilder("{");
-		res.Append(preBreak);
-		res.Append("}{");
-		res.Append(postBreak);
-		res.Append("}{");
-		res.Append(noBreak);
-		res.Append('}');
-		return res.ToString();
-	  }
+        internal Hyphen(string pre, string no, string post)
+        {
+            preBreak = pre;
+            noBreak = no;
+            postBreak = post;
+        }
 
-	}
+        internal Hyphen(string pre)
+        {
+            preBreak = pre;
+            noBreak = null;
+            postBreak = null;
+        }
 
+        public override string ToString()
+        {
+            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
+            {
+                return "-";
+            }
+            StringBuilder res = new StringBuilder("{");
+            res.Append(preBreak);
+            res.Append("}{");
+            res.Append(postBreak);
+            res.Append("}{");
+            res.Append(noBreak);
+            res.Append('}');
+            return res.ToString();
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
index bf2a170..fdbac29 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
@@ -1,55 +1,53 @@
-\ufeff/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
+\ufeffnamespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This class represents a hyphenated word.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-	/// </summary>
-	public class Hyphenation
-	{
-
-	  private readonly int[] hyphenPoints;
+    /// <summary>
+    /// This class represents a hyphenated word.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public class Hyphenation
+    {
 
-	  /// <summary>
-	  /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
-	  /// </summary>
-	  internal Hyphenation(int[] points)
-	  {
-		hyphenPoints = points;
-	  }
+        private readonly int[] hyphenPoints;
 
-	  /// <returns> the number of hyphenation points in the word </returns>
-	  public virtual int length()
-	  {
-		return hyphenPoints.Length;
-	  }
+        /// <summary>
+        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
+        /// </summary>
+        internal Hyphenation(int[] points)
+        {
+            hyphenPoints = points;
+        }
 
-	  /// <returns> the hyphenation points </returns>
-	  public virtual int[] HyphenationPoints
-	  {
-		  get
-		  {
-			return hyphenPoints;
-		  }
-	  }
-	}
+        /// <returns> the number of hyphenation points in the word </returns>
+        public virtual int Length
+        {
+            get { return hyphenPoints.Length; }
+        }
 
+        /// <returns> the hyphenation points </returns>
+        public virtual int[] HyphenationPoints
+        {
+            get
+            {
+                return hyphenPoints;
+            }
+        }
+    }
 }
\ No newline at end of file


[41/50] [abbrv] lucenenet git commit: Ported TestAllDictionaries and TestAllDictionaries2 for Analysis.Hunspell

Posted by sy...@apache.org.
Ported TestAllDictionaries and TestAllDictionaries2 for Analysis.Hunspell


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/efa13ffd
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/efa13ffd
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/efa13ffd

Branch: refs/heads/analysis-work
Commit: efa13ffd94c6b7a3cd715e3b02219d43216b437b
Parents: e4d9f44
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sat Aug 20 14:41:57 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Tue Aug 23 02:58:53 2016 +0700

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../Analysis/Hunspell/TestAllDictionaries.cs    | 214 +++++++++++---
 .../Analysis/Hunspell/TestAllDictionaries2.cs   | 278 ++++++++++++++-----
 .../Lucene.Net.Tests.Analysis.Common.csproj     |   2 +
 4 files changed, 385 insertions(+), 110 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/efa13ffd/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 1179779..1968c82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,4 @@ doc/
 src/demo/
 packages/
 TestResults/
+test-files/analysis/data/

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/efa13ffd/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
index c3bc291..687a39c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries.cs
@@ -25,77 +25,201 @@ namespace Lucene.Net.Analysis.Hunspell
 	 * limitations under the License.
 	 */
 
-
-    //using IOUtils = org.apache.lucene.util.IOUtils;
-    //using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-    //using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-    //using Ignore = org.junit.Ignore;
-
     /// <summary>
     /// Can be retrieved via:
     /// wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
     /// Note some of the files differ only in case. This may be a problem on your operating system!
     /// </summary>
 
-    //[Ignore("Enable manually")]
+    [Ignore("Enable manually")]
     public class TestAllDictionaries : LuceneTestCase
     {
 
         // set this to the location of where you downloaded all the files
-        internal static readonly FileInfo DICTIONARY_HOME = new FileInfo("/data/archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries");
+        internal static readonly DirectoryInfo DICTIONARY_HOME = new DirectoryInfo(@"..\..\..\..\test-files\analysis\data\dictionaries");
 
-        internal readonly string[] tests = new string[] { "af_ZA.zip", "af_ZA.dic", "af_ZA.aff", "ak_GH.zip", "ak_GH.dic", "ak_GH.aff", "bg_BG.zip", "bg_BG.dic", "bg_BG.aff", "ca_ANY.zip", "catalan.dic", "catalan.aff", "ca_ES.zip", "ca_ES.dic", "ca_ES.aff", "cs_CZ.zip", "cs_CZ.dic", "cs_CZ.aff", "cy_GB.zip", "cy_GB.dic", "cy_GB.aff", "da_DK.zip", "da_DK.dic", "da_DK.aff", "de_AT.zip", "de_AT.dic", "de_AT.aff", "de_CH.zip", "de_CH.dic", "de_CH.aff", "de_DE.zip", "de_DE.dic", "de_DE.aff", "de_DE_comb.zip", "de_DE_comb.dic", "de_DE_comb.aff", "de_DE_frami.zip", "de_DE_frami.dic", "de_DE_frami.aff", "de_DE_neu.zip", "de_DE_neu.dic", "de_DE_neu.aff", "el_GR.zip", "el_GR.dic", "el_GR.aff", "en_AU.zip", "en_AU.dic", "en_AU.aff", "en_CA.zip", "en_CA.dic", "en_CA.aff", "en_GB-oed.zip", "en_GB-oed.dic", "en_GB-oed.aff", "en_GB.zip", "en_GB.dic", "en_GB.aff", "en_NZ.zip", "en_NZ.dic", "en_NZ.aff", "eo.zip", "eo_l3.dic", "eo_l3.aff", "eo_EO.zip", "eo_EO.dic", "eo_EO.aff", "es_AR.zip", "es_AR.di
 c", "es_AR.aff", "es_BO.zip", "es_BO.dic", "es_BO.aff", "es_CL.zip", "es_CL.dic", "es_CL.aff", "es_CO.zip", "es_CO.dic", "es_CO.aff", "es_CR.zip", "es_CR.dic", "es_CR.aff", "es_CU.zip", "es_CU.dic", "es_CU.aff", "es_DO.zip", "es_DO.dic", "es_DO.aff", "es_EC.zip", "es_EC.dic", "es_EC.aff", "es_ES.zip", "es_ES.dic", "es_ES.aff", "es_GT.zip", "es_GT.dic", "es_GT.aff", "es_HN.zip", "es_HN.dic", "es_HN.aff", "es_MX.zip", "es_MX.dic", "es_MX.aff", "es_NEW.zip", "es_NEW.dic", "es_NEW.aff", "es_NI.zip", "es_NI.dic", "es_NI.aff", "es_PA.zip", "es_PA.dic", "es_PA.aff", "es_PE.zip", "es_PE.dic", "es_PE.aff", "es_PR.zip", "es_PR.dic", "es_PR.aff", "es_PY.zip", "es_PY.dic", "es_PY.aff", "es_SV.zip", "es_SV.dic", "es_SV.aff", "es_UY.zip", "es_UY.dic", "es_UY.aff", "es_VE.zip", "es_VE.dic", "es_VE.aff", "et_EE.zip", "et_EE.dic", "et_EE.aff", "fo_FO.zip", "fo_FO.dic", "fo_FO.aff", "fr_FR-1990_1-3-2.zip", "fr_FR-1990.dic", "fr_FR-1990.aff", "fr_FR-classique_1-3-2.zip", "fr_FR-classique.dic", "fr_FR-
 classique.aff", "fr_FR_1-3-2.zip", "fr_FR.dic", "fr_FR.aff", "fy_NL.zip", "fy_NL.dic", "fy_NL.aff", "ga_IE.zip", "ga_IE.dic", "ga_IE.aff", "gd_GB.zip", "gd_GB.dic", "gd_GB.aff", "gl_ES.zip", "gl_ES.dic", "gl_ES.aff", "gsc_FR.zip", "gsc_FR.dic", "gsc_FR.aff", "gu_IN.zip", "gu_IN.dic", "gu_IN.aff", "he_IL.zip", "he_IL.dic", "he_IL.aff", "hi_IN.zip", "hi_IN.dic", "hi_IN.aff", "hil_PH.zip", "hil_PH.dic", "hil_PH.aff", "hr_HR.zip", "hr_HR.dic", "hr_HR.aff", "hu_HU.zip", "hu_HU.dic", "hu_HU.aff", "hu_HU_comb.zip", "hu_HU.dic", "hu_HU.aff", "ia.zip", "ia.dic", "ia.aff", "id_ID.zip", "id_ID.dic", "id_ID.aff", "it_IT.zip", "it_IT.dic", "it_IT.aff", "ku_TR.zip", "ku_TR.dic", "ku_TR.aff", "la.zip", "la.dic", "la.aff", "lt_LT.zip", "lt_LT.dic", "lt_LT.aff", "lv_LV.zip", "lv_LV.dic", "lv_LV.aff", "mg_MG.zip", "mg_MG.dic", "mg_MG.aff", "mi_NZ.zip", "mi_NZ.dic", "mi_NZ.aff", "mk_MK.zip", "mk_MK.dic", "mk_MK.aff", "mos_BF.zip", "mos_BF.dic", "mos_BF.aff", "mr_IN.zip", "mr_IN.dic", "mr_IN.aff", "ms_
 MY.zip", "ms_MY.dic", "ms_MY.aff", "nb_NO.zip", "nb_NO.dic", "nb_NO.aff", "ne_NP.zip", "ne_NP.dic", "ne_NP.aff", "nl_NL.zip", "nl_NL.dic", "nl_NL.aff", "nl_med.zip", "nl_med.dic", "nl_med.aff", "nn_NO.zip", "nn_NO.dic", "nn_NO.aff", "nr_ZA.zip", "nr_ZA.dic", "nr_ZA.aff", "ns_ZA.zip", "ns_ZA.dic", "ns_ZA.aff", "ny_MW.zip", "ny_MW.dic", "ny_MW.aff", "oc_FR.zip", "oc_FR.dic", "oc_FR.aff", "pl_PL.zip", "pl_PL.dic", "pl_PL.aff", "pt_BR.zip", "pt_BR.dic", "pt_BR.aff", "pt_PT.zip", "pt_PT.dic", "pt_PT.aff", "ro_RO.zip", "ro_RO.dic", "ro_RO.aff", "ru_RU.zip", "ru_RU.dic", "ru_RU.aff", "ru_RU_ye.zip", "ru_RU_ie.dic", "ru_RU_ie.aff", "ru_RU_yo.zip", "ru_RU_yo.dic", "ru_RU_yo.aff", "rw_RW.zip", "rw_RW.dic", "rw_RW.aff", "sk_SK.zip", "sk_SK.dic", "sk_SK.aff", "sl_SI.zip", "sl_SI.dic", "sl_SI.aff", "sq_AL.zip", "sq_AL.dic", "sq_AL.aff", "ss_ZA.zip", "ss_ZA.dic", "ss_ZA.aff", "st_ZA.zip", "st_ZA.dic", "st_ZA.aff", "sv_SE.zip", "sv_SE.dic", "sv_SE.aff", "sw_KE.zip", "sw_KE.dic", "sw_KE.aff", "tet_
 ID.zip", "tet_ID.dic", "tet_ID.aff", "th_TH.zip", "th_TH.dic", "th_TH.aff", "tl_PH.zip", "tl_PH.dic", "tl_PH.aff", "tn_ZA.zip", "tn_ZA.dic", "tn_ZA.aff", "ts_ZA.zip", "ts_ZA.dic", "ts_ZA.aff", "uk_UA.zip", "uk_UA.dic", "uk_UA.aff", "ve_ZA.zip", "ve_ZA.dic", "ve_ZA.aff", "vi_VN.zip", "vi_VN.dic", "vi_VN.aff", "xh_ZA.zip", "xh_ZA.dic", "xh_ZA.aff", "zu_ZA.zip", "zu_ZA.dic", "zu_ZA.aff" };
+        internal readonly string[] tests = new string[] 
+        {
+            /* zip file */               /* dictionary */       /* affix */
+            "af_ZA.zip",                 "af_ZA.dic",           "af_ZA.aff",
+            "ak_GH.zip",                 "ak_GH.dic",           "ak_GH.aff",
+            "bg_BG.zip",                 "bg_BG.dic",           "bg_BG.aff",
+            "ca_ANY.zip",                "catalan.dic",         "catalan.aff",
+            "ca_ES.zip",                 "ca_ES.dic",           "ca_ES.aff",
+// BUG: broken flag "cop_EG.zip",                "cop_EG.dic",          "cop_EG.aff",
+            "cs_CZ.zip",                 "cs_CZ.dic",           "cs_CZ.aff",
+            "cy_GB.zip",                 "cy_GB.dic",           "cy_GB.aff",
+            "da_DK.zip",                 "da_DK.dic",           "da_DK.aff",
+            "de_AT.zip",                 "de_AT.dic",           "de_AT.aff",
+            "de_CH.zip",                 "de_CH.dic",           "de_CH.aff",
+            "de_DE.zip",                 "de_DE.dic",           "de_DE.aff",
+            "de_DE_comb.zip",            "de_DE_comb.dic",      "de_DE_comb.aff",
+            "de_DE_frami.zip",           "de_DE_frami.dic",     "de_DE_frami.aff",
+            "de_DE_neu.zip",             "de_DE_neu.dic",       "de_DE_neu.aff",
+            "el_GR.zip",                 "el_GR.dic",           "el_GR.aff",
+            "en_AU.zip",                 "en_AU.dic",           "en_AU.aff",
+            "en_CA.zip",                 "en_CA.dic",           "en_CA.aff",
+            "en_GB-oed.zip",             "en_GB-oed.dic",       "en_GB-oed.aff",
+            "en_GB.zip",                 "en_GB.dic",           "en_GB.aff",
+            "en_NZ.zip",                 "en_NZ.dic",           "en_NZ.aff",
+            "eo.zip",                    "eo_l3.dic",           "eo_l3.aff",
+            "eo_EO.zip",                 "eo_EO.dic",           "eo_EO.aff",
+            "es_AR.zip",                 "es_AR.dic",           "es_AR.aff",
+            "es_BO.zip",                 "es_BO.dic",           "es_BO.aff",
+            "es_CL.zip",                 "es_CL.dic",           "es_CL.aff",
+            "es_CO.zip",                 "es_CO.dic",           "es_CO.aff",
+            "es_CR.zip",                 "es_CR.dic",           "es_CR.aff",
+            "es_CU.zip",                 "es_CU.dic",           "es_CU.aff",
+            "es_DO.zip",                 "es_DO.dic",           "es_DO.aff",
+            "es_EC.zip",                 "es_EC.dic",           "es_EC.aff",
+            "es_ES.zip",                 "es_ES.dic",           "es_ES.aff",
+            "es_GT.zip",                 "es_GT.dic",           "es_GT.aff",
+            "es_HN.zip",                 "es_HN.dic",           "es_HN.aff",
+            "es_MX.zip",                 "es_MX.dic",           "es_MX.aff",
+            "es_NEW.zip",                "es_NEW.dic",          "es_NEW.aff",
+            "es_NI.zip",                 "es_NI.dic",           "es_NI.aff",
+            "es_PA.zip",                 "es_PA.dic",           "es_PA.aff",
+            "es_PE.zip",                 "es_PE.dic",           "es_PE.aff",
+            "es_PR.zip",                 "es_PR.dic",           "es_PR.aff",
+            "es_PY.zip",                 "es_PY.dic",           "es_PY.aff",
+            "es_SV.zip",                 "es_SV.dic",           "es_SV.aff",
+            "es_UY.zip",                 "es_UY.dic",           "es_UY.aff",
+            "es_VE.zip",                 "es_VE.dic",           "es_VE.aff",
+            "et_EE.zip",                 "et_EE.dic",           "et_EE.aff",
+            "fo_FO.zip",                 "fo_FO.dic",           "fo_FO.aff",
+            "fr_FR-1990_1-3-2.zip",      "fr_FR-1990.dic",      "fr_FR-1990.aff",
+            "fr_FR-classique_1-3-2.zip", "fr_FR-classique.dic", "fr_FR-classique.aff",
+            "fr_FR_1-3-2.zip",           "fr_FR.dic",           "fr_FR.aff",
+            "fy_NL.zip",                 "fy_NL.dic",           "fy_NL.aff",
+            "ga_IE.zip",                 "ga_IE.dic",           "ga_IE.aff",
+            "gd_GB.zip",                 "gd_GB.dic",           "gd_GB.aff",
+            "gl_ES.zip",                 "gl_ES.dic",           "gl_ES.aff",
+            "gsc_FR.zip",                "gsc_FR.dic",          "gsc_FR.aff",
+            "gu_IN.zip",                 "gu_IN.dic",           "gu_IN.aff",
+            "he_IL.zip",                 "he_IL.dic",           "he_IL.aff",
+            "hi_IN.zip",                 "hi_IN.dic",           "hi_IN.aff",
+            "hil_PH.zip",                "hil_PH.dic",          "hil_PH.aff",
+            "hr_HR.zip",                 "hr_HR.dic",           "hr_HR.aff",
+            "hu_HU.zip",                 "hu_HU.dic",           "hu_HU.aff",
+            "hu_HU_comb.zip",            "hu_HU.dic",           "hu_HU.aff",
+            "ia.zip",                    "ia.dic",              "ia.aff",
+            "id_ID.zip",                 "id_ID.dic",           "id_ID.aff",
+            "it_IT.zip",                 "it_IT.dic",           "it_IT.aff",
+            "ku_TR.zip",                 "ku_TR.dic",           "ku_TR.aff",
+            "la.zip",                    "la.dic",              "la.aff",
+            "lt_LT.zip",                 "lt_LT.dic",           "lt_LT.aff",
+            "lv_LV.zip",                 "lv_LV.dic",           "lv_LV.aff",
+            "mg_MG.zip",                 "mg_MG.dic",           "mg_MG.aff",
+            "mi_NZ.zip",                 "mi_NZ.dic",           "mi_NZ.aff",
+            "mk_MK.zip",                 "mk_MK.dic",           "mk_MK.aff",
+            "mos_BF.zip",                "mos_BF.dic",          "mos_BF.aff",
+            "mr_IN.zip",                 "mr_IN.dic",           "mr_IN.aff",
+            "ms_MY.zip",                 "ms_MY.dic",           "ms_MY.aff",
+            "nb_NO.zip",                 "nb_NO.dic",           "nb_NO.aff",
+            "ne_NP.zip",                 "ne_NP.dic",           "ne_NP.aff",
+            "nl_NL.zip",                 "nl_NL.dic",           "nl_NL.aff",
+            "nl_med.zip",                "nl_med.dic",          "nl_med.aff",
+            "nn_NO.zip",                 "nn_NO.dic",           "nn_NO.aff",
+            "nr_ZA.zip",                 "nr_ZA.dic",           "nr_ZA.aff",
+            "ns_ZA.zip",                 "ns_ZA.dic",           "ns_ZA.aff",
+            "ny_MW.zip",                 "ny_MW.dic",           "ny_MW.aff",
+            "oc_FR.zip",                 "oc_FR.dic",           "oc_FR.aff",
+            "pl_PL.zip",                 "pl_PL.dic",           "pl_PL.aff",
+            "pt_BR.zip",                 "pt_BR.dic",           "pt_BR.aff",
+            "pt_PT.zip",                 "pt_PT.dic",           "pt_PT.aff",
+            "ro_RO.zip",                 "ro_RO.dic",           "ro_RO.aff",
+            "ru_RU.zip",                 "ru_RU.dic",           "ru_RU.aff",
+            "ru_RU_ye.zip",              "ru_RU_ie.dic",        "ru_RU_ie.aff",
+            "ru_RU_yo.zip",              "ru_RU_yo.dic",        "ru_RU_yo.aff",
+            "rw_RW.zip",                 "rw_RW.dic",           "rw_RW.aff",
+            "sk_SK.zip",                 "sk_SK.dic",           "sk_SK.aff",
+            "sl_SI.zip",                 "sl_SI.dic",           "sl_SI.aff",
+            "sq_AL.zip",                 "sq_AL.dic",           "sq_AL.aff",
+            "ss_ZA.zip",                 "ss_ZA.dic",           "ss_ZA.aff",
+            "st_ZA.zip",                 "st_ZA.dic",           "st_ZA.aff",
+            "sv_SE.zip",                 "sv_SE.dic",           "sv_SE.aff",
+            "sw_KE.zip",                 "sw_KE.dic",           "sw_KE.aff",
+            "tet_ID.zip",                "tet_ID.dic",          "tet_ID.aff",
+            "th_TH.zip",                 "th_TH.dic",           "th_TH.aff",
+            "tl_PH.zip",                 "tl_PH.dic",           "tl_PH.aff",
+            "tn_ZA.zip",                 "tn_ZA.dic",           "tn_ZA.aff",
+            "ts_ZA.zip",                 "ts_ZA.dic",           "ts_ZA.aff",
+            "uk_UA.zip",                 "uk_UA.dic",           "uk_UA.aff",
+            "ve_ZA.zip",                 "ve_ZA.dic",           "ve_ZA.aff",
+            "vi_VN.zip",                 "vi_VN.dic",           "vi_VN.aff",
+            "xh_ZA.zip",                 "xh_ZA.dic",           "xh_ZA.aff",
+            "zu_ZA.zip",                 "zu_ZA.dic",           "zu_ZA.aff",
+        };
 
         [Test]
-        public virtual void test()
+        public virtual void Test()
         {
             for (int i = 0; i < tests.Length; i += 3)
             {
                 FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
                 Debug.Assert(f.Exists);
 
-                using (ZipFile zip = new ZipFile(f, Encoding.UTF8))
+                using (Stream fileStream = f.OpenRead())
                 {
-                    ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-                    Debug.Assert(dicEntry != null);
-                    ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-                    Debug.Assert(affEntry != null);
-
-                    using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
-			        {
-                Dictionary dic = new Dictionary(affix, dictionary);
-                Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" + "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " + "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " + "strips=" + RamUsageEstimator.humanSizeOf(dic.stripData) + ", " + "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " + "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " + "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " + "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
+                    using (ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8))
+                    {
+                        ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
+                        Debug.Assert(dicEntry != null);
+                        ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
+                        Debug.Assert(affEntry != null);
+
+                        using (Stream dictionary = dicEntry.Open())
+                        {
+                            using (Stream affix = affEntry.Open())
+                            {
+                                Dictionary dic = new Dictionary(affix, dictionary);
+                                Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.HumanSizeOf(dic) + "\t(" +
+                                    "words=" + RamUsageEstimator.HumanSizeOf(dic.words) + ", " +
+                                    "flags=" + RamUsageEstimator.HumanSizeOf(dic.flagLookup) + ", " +
+                                    "strips=" + RamUsageEstimator.HumanSizeOf(dic.stripData) + ", " +
+                                    "conditions=" + RamUsageEstimator.HumanSizeOf(dic.patterns) + ", " +
+                                    "affixData=" + RamUsageEstimator.HumanSizeOf(dic.affixData) + ", " +
+                                    "prefixes=" + RamUsageEstimator.HumanSizeOf(dic.prefixes) + ", " +
+                                    "suffixes=" + RamUsageEstimator.HumanSizeOf(dic.suffixes) + ")");
+                            }
+                        }
+                    }
+                }
             }
         }
-    }
-
 
-    [Test]
-    public virtual void testOneDictionary()
-    {
-        string toTest = "hu_HU.zip";
-        for (int i = 0; i < tests.Length; i++)
+        [Test]
+        public virtual void testOneDictionary()
         {
-            if (tests[i].Equals(toTest))
+            string toTest = "hu_HU.zip";
+            for (int i = 0; i < tests.Length; i++)
             {
-                File f = new File(DICTIONARY_HOME, tests[i]);
-                Debug.Assert(f.exists());
-
-                using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
+                if (tests[i].Equals(toTest))
                 {
-                    ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-                    Debug.Assert(dicEntry != null);
-                    ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-                    Debug.Assert(affEntry != null);
-
-                    using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
-			  {
-            new Dictionary(affix, dictionary);
+                    FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
+                    Debug.Assert(f.Exists);
+
+                    using (Stream fileStream = f.OpenRead())
+                    {
+                        using (ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8))
+                        {
+                            ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
+                            Debug.Assert(dicEntry != null);
+                            ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
+                            Debug.Assert(affEntry != null);
+
+                            using (Stream dictionary = dicEntry.Open())
+                            {
+                                using (Stream affix = affEntry.Open())
+                                {
+                                    new Dictionary(affix, dictionary);
+                                }
+                            }
+
+                        }
+                    }
+                }
+            }
         }
     }
-
-}
-	  }
-	}
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/efa13ffd/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
index eaae4f5..1914825 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestAllDictionaries2.cs
@@ -1,11 +1,15 @@
 \ufeffusing System;
 using System.Diagnostics;
 using NUnit.Framework;
+using Lucene.Net.Util;
+using System.IO;
+using System.IO.Compression;
+using System.Text;
 
 namespace Lucene.Net.Analysis.Hunspell
 {
 
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -23,78 +27,222 @@ namespace Lucene.Net.Analysis.Hunspell
 	 */
 
 
-	using IOUtils = org.apache.lucene.util.IOUtils;
-	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
-	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-	using Ignore = org.junit.Ignore;
+    //using IOUtils = org.apache.lucene.util.IOUtils;
+    //using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+    //using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+    //using Ignore = org.junit.Ignore;
 
-	/// <summary>
-	/// These thunderbird dictionaries can be retrieved via:
-	/// https://addons.mozilla.org/en-US/thunderbird/language-tools/
-	/// You must click and download every file: sorry!
-	/// </summary>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @Ignore("enable manually") public class TestAllDictionaries2 extends org.apache.lucene.util.LuceneTestCase
-	public class TestAllDictionaries2 : LuceneTestCase
-	{
+    /// <summary>
+    /// These thunderbird dictionaries can be retrieved via:
+    /// https://addons.mozilla.org/en-US/thunderbird/language-tools/
+    /// You must click and download every file: sorry!
+    /// </summary>
 
-	  // set this to the location of where you downloaded all the files
-	  internal static readonly File DICTIONARY_HOME = new File("/data/thunderbirdDicts");
+    [Ignore("enable manually")]
+    public class TestAllDictionaries2 : LuceneTestCase
+    {
 
-	  internal readonly string[] tests = new string[] {"addon-0.4.5-an+fx+tb+fn+sm.xpi", "dictionaries/ru.dic", "dictionaries/ru.aff", "addon-0.5.5-fx+tb.xpi", "dictionaries/ko-KR.dic", "dictionaries/ko-KR.aff", "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff", "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff", "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff", "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff", "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff", "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff", "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff", "belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictiona
 ries/be.dic", "dictionaries/be.aff", "bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi", "dictionaries/bn-BD.dic", "dictionaries/bn-BD.aff", "brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi", "dictionaries/pt-BR-antigo.dic", "dictionaries/pt-BR-antigo.aff", "brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi", "dictionaries/pt-BR.dic", "dictionaries/pt-BR.aff", "british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi", "dictionaries/en-GB.dic", "dictionaries/en-GB.aff", "bulgarian_dictionary-4.3-fx+tb+sm.xpi", "dictionaries/bg.dic", "dictionaries/bg.aff", "canadian_english_dictionary-2.0.8-fx+sm+tb.xpi", "dictionaries/en-CA.dic", "dictionaries/en-CA.aff", "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi", "dictionaries/cs.dic", "dictionaries/cs.aff", "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/ny_MW.dic", "dictionaries/ny_MW.aff", "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi", "dictionaries/gl_ES.dic", "dictionari
 es/gl_ES.aff", "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi", "dictionaries/an_ES.dic", "dictionaries/an_ES.aff", "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic", "dictionaries/hr.aff", "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi", "dictionaries/hr-HR.dic", "dictionaries/hr-HR.aff", "dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi", "dictionaries/da.dic", "dictionaries/da.aff", "deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi", "dictionaries/de-DE-1901.dic", "dictionaries/de-DE-1901.aff", "diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi", "dictionaries/es-ES.dic", "dictionaries/es-ES.aff", "diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi", "dictionaries/es_VE.dic", "dictionaries/es_VE.aff", "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff", "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "d
 ictionaries/es_MX.aff", "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff", "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff", "difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff", "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff", "dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.af
 f", "dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff", "eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff", "english_australian_dictionary-2.1.2-tb+fx+sm.xpi", "dictionaries/en-AU.dic", "dictionaries/en-AU.aff", "esperanta_vortaro-1.0.2-fx+tb+sm.xpi", "dictionaries/eo-EO.dic", "dictionaries/eo-EO.aff", "european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi", "dictionaries/pt-PT.dic", "dictionaries/pt-PT.aff", "faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi", "dictionaries/fo_FO.dic", "dictionaries/fo_FO.aff", "frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi", "dictionaries/fy.dic", "dictionaries/fy.aff", "geiriadur_cymraeg-1.08-tb+sm+fx.xpi", "dictionaries/cy_GB.dic", "dictionaries/cy_GB.aff", "general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi", "dictionaries/ca.dic", "dictionaries/ca.aff", "german_dictionary-2.0.3-fn+fx+sm+tb.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff", "german_dictionary_d
 e_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff", "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff", "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi", "dictionaries/de-DE.dic", "dictionaries/de-DE.aff", "german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi", "dictionaries/de-AT.dic", "dictionaries/de-AT.aff", "german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi", "dictionaries/de-CH.dic", "dictionaries/de-CH.aff", "greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi", "dictionaries/el-GR.dic", "dictionaries/el-GR.aff", "gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/gu_IN.dic", "dictionaries/gu_IN.aff", "haitian_creole_spell_checker-0.08-tb+sm+fx.xpi", "dictionaries/ht-HT.dic", "dictionaries/ht-HT.aff", "hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff", "hebrew_spell_checking_dict
 ionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff", "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff", "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff", "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff", "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff", "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff", "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff", "kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff", "lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi", "dictionaries/lo_LA.dic", "dictionaries/lo_LA.aff", "latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi", "dictionaries/lv_LV.dic", "d
 ictionaries/lv_LV.aff", "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff", "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff", "litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff", "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff", "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff", "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff", "marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff", "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff", "nepali_dictionary-1.2-fx+tb.xpi", "dictionaries/ne_NP.dic", "dictionaries/ne_NP.aff", "norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi
 ", "dictionaries/nb.dic", "dictionaries/nb.aff", "norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi", "dictionaries/nn.dic", "dictionaries/nn.aff", "northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nso-ZA.dic", "dictionaries/nso-ZA.aff", "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff", "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff", "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff", "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff", "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff", "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff", "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff", 
 "serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff", "serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Latn.dic", "dictionaries/sr-RS-Latn.aff", "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK.dic", "dictionaries/sk-SK.aff", "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi", "dictionaries/sk-SK-ascii.dic", "dictionaries/sk-SK-ascii.aff", "slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi", "dictionaries/sl.dic", "dictionaries/sl.aff", "songhay_spell_checker-0.03-fx+tb+sm.xpi", "dictionaries/Songhay - Mali.dic", "dictionaries/Songhay - Mali.aff", "southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/st-ZA.dic", "dictionaries/st-ZA.aff", "sownik_acinski-0.41.20110603-tb+fx+sm.xpi", "dictionaries/la.dic", "dictionaries/la.aff", "sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi", "dictionaries/dsb.dic", "dictionaries/dsb.aff", "srpska_latinica-0.1-fx+tb+sm.xpi", "dictionaries/S
 rpski_latinica.dic", "dictionaries/Srpski_latinica.aff", "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv.dic", "dictionaries/sv.aff", "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi", "dictionaries/sv_FI.dic", "dictionaries/sv_FI.aff", "swati_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ss-ZA.dic", "dictionaries/ss-ZA.aff", "tamil_spell_checker_for_firefox-0.4-tb+fx.xpi", "dictionaries/ta-TA.dic", "dictionaries/ta-TA.aff", "telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff", "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff", "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff", "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff", "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff", "turkmen_spell_checker_dictionary-0.1.6-tb+fx+
 sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff", "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff", "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff", "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff", "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff", "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff", "venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff", "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff", "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.d
 ic", "dictionaries/vi-DauCu.aff", "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff", "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff", "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff", "xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff", "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff", "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/zu-ZA.dic", "dictionaries/zu-ZA.aff"};
+        // set this to the location of where you downloaded all the files
+        internal static readonly DirectoryInfo DICTIONARY_HOME = new DirectoryInfo(@"..\..\..\..\test-files\analysis\data\thunderbirdDicts");
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void test() throws Exception
-	  public virtual void test()
-	  {
-		for (int i = 0; i < tests.Length; i += 3)
-		{
-		  File f = new File(DICTIONARY_HOME, tests[i]);
-		  Debug.Assert(f.exists());
+        internal readonly string[] tests = new string[]
+        {
+            /* zip file */                                                                    /* dictionary */                      /* affix */
+            "addon-0.4.5-an+fx+tb+fn+sm.xpi",                                                 "dictionaries/ru.dic",                "dictionaries/ru.aff",
+            "addon-0.5.5-fx+tb.xpi",                                                          "dictionaries/ko-KR.dic",             "dictionaries/ko-KR.aff",
+            "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi",                               "dictionaries/af-ZA.dic",             "dictionaries/af-ZA.aff",
+            "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi",                                   "dictionaries/sq.dic",                "dictionaries/sq.aff",
+            "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi",                                      "dictionaries/am_ET.dic",             "dictionaries/am_ET.aff",
+            "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
+            "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi",                            "dictionaries/hy_AM.dic",             "dictionaries/hy_AM.aff",
+            "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                               "dictionaries/az-Latn-AZ.dic",        "dictionaries/az-Latn-AZ.aff",
+            "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi",                               "dictionaries/be-classic.dic",        "dictionaries/be-classic.aff",
+            "belarusian_dictionary-0.1.2-fx+sm+tb.xpi",                                       "dictionaries/be.dic",                "dictionaries/be.aff",
+            "bengali_bangladesh_dictionary-0.08-sm+tb+fx.xpi",                                "dictionaries/bn-BD.dic",             "dictionaries/bn-BD.aff",
+            "brazilian_portuguese_dictionary_former_spelling-28.20140203-tb+sm+fx.xpi",       "dictionaries/pt-BR-antigo.dic",      "dictionaries/pt-BR-antigo.aff",
+            "brazilian_portuguese_dictionary_new_spelling-28.20140203-fx+sm+tb.xpi",          "dictionaries/pt-BR.dic",             "dictionaries/pt-BR.aff",
+            "british_english_dictionary_updated-1.19.5-sm+fx+tb.xpi",                         "dictionaries/en-GB.dic",             "dictionaries/en-GB.aff",
+            "bulgarian_dictionary-4.3-fx+tb+sm.xpi",                                          "dictionaries/bg.dic",                "dictionaries/bg.aff",
+            "canadian_english_dictionary-2.0.8-fx+sm+tb.xpi",                                 "dictionaries/en-CA.dic",             "dictionaries/en-CA.aff",
+            "ceske_slovniky_pro_kontrolu_pravopisu-1.0.4-tb+sm+fx.xpi",                       "dictionaries/cs.dic",                "dictionaries/cs.aff",
+            "chichewa_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                                  "dictionaries/ny_MW.dic",             "dictionaries/ny_MW.aff",
+            "corrector_de_galego-13.10.0-fn+sm+tb+fx.xpi",                                    "dictionaries/gl_ES.dic",             "dictionaries/gl_ES.aff",
+//BUG: broken flags "corrector_orthographic_de_interlingua-6.0-fn+sm+tb+fx.xpi",                      "dictionaries/ia-ia.dic",             "dictionaries/ia-ia.aff",
+            "corrector_ortografico_aragones-0.2-fx+tb+sm.xpi",                                "dictionaries/an_ES.dic",             "dictionaries/an_ES.aff",
+            "croatian_dictionary_-_hrvatski_rjecnik-1.0.1-firefox+thunderbird+seamonkey.xpi", "dictionaries/hr.dic",                "dictionaries/hr.aff",
+            "croatian_dictionary_hrvatski_rjecnik-1.0.9-an+fx+fn+tb+sm.xpi",                  "dictionaries/hr-HR.dic",             "dictionaries/hr-HR.aff",
+            "dansk_ordbog_til_stavekontrollen-2.2.1-sm+tb+fx.xpi",                            "dictionaries/da.dic",                "dictionaries/da.aff",
+            "deutsches_worterbuch_de_de_alte_rechtschreibung-2.1.8-sm.xpi",                   "dictionaries/de-DE-1901.dic",        "dictionaries/de-DE-1901.aff",
+            "diccionario_de_espanolespana-1.7-sm+tb+fn+fx.xpi",                               "dictionaries/es-ES.dic",             "dictionaries/es-ES.aff",
+            "diccionario_en_espanol_para_venezuela-1.1.17-sm+an+tb+fn+fx.xpi",                "dictionaries/es_VE.dic",             "dictionaries/es_VE.aff",
+            "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi",                               "dictionaries/es_AR.dic",             "dictionaries/es_AR.aff",
+            "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi",                               "dictionaries/es_MX.dic",             "dictionaries/es_MX.aff",
+            "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi",                       "dictionaries/roa-ES-val.dic",        "dictionaries/roa-ES-val.aff",
+            "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi",                                "dictionaries/Papiamento.dic",        "dictionaries/Papiamento.aff",
+            "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
+            "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic.dic",        "dictionaries/fr-classic.aff",
+            "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-modern.dic",         "dictionaries/fr-modern.aff",
+            "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-reform.dic",         "dictionaries/fr-reform.aff",
+            "difazier_an_drouizig-0.12-tb+sm+fx.xpi",                                         "dictionaries/br.dic",                "dictionaries/br.aff",
+            "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi",                    "dictionaries/Papiamentu.dic",        "dictionaries/Papiamentu.aff",
+            "dizionari_furlan-3.1-tb+fx+sm.xpi",                                              "dictionaries/fur-IT.dic",            "dictionaries/fur-IT.aff",
+            "dizionario_italiano-3.3.2-fx+sm+tb.xpi",                                         "dictionaries/it_IT.dic",             "dictionaries/it_IT.aff",
+            "eesti_keele_speller-3.2-fx+tb+sm.xpi",                                           "dictionaries/et-EE.dic",             "dictionaries/et-EE.aff",
+            "english_australian_dictionary-2.1.2-tb+fx+sm.xpi",                               "dictionaries/en-AU.dic",             "dictionaries/en-AU.aff",
+            "esperanta_vortaro-1.0.2-fx+tb+sm.xpi",                                           "dictionaries/eo-EO.dic",             "dictionaries/eo-EO.aff",
+            "european_portuguese_spellchecker-14.1.1.1-tb+fx.xpi",                            "dictionaries/pt-PT.dic",             "dictionaries/pt-PT.aff",
+            "faroese_spell_checker_faroe_islands-2.0-tb+sm+fx+fn.xpi",                        "dictionaries/fo_FO.dic",             "dictionaries/fo_FO.aff",
+            "frysk_wurdboek-2.1.1-fn+sm+fx+an+tb.xpi",                                        "dictionaries/fy.dic",                "dictionaries/fy.aff",
+            "geiriadur_cymraeg-1.08-tb+sm+fx.xpi",                                            "dictionaries/cy_GB.dic",             "dictionaries/cy_GB.aff",
+            "general_catalan_dictionary-2.5.0-tb+sm+fn+fx.xpi",                               "dictionaries/ca.dic",                "dictionaries/ca.aff",
+            "german_dictionary-2.0.3-fn+fx+sm+tb.xpi",                                        "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+            "german_dictionary_de_at_new_orthography-20130905-tb+fn+an+fx+sm.xpi",            "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+            "german_dictionary_de_ch_new_orthography-20130905-fx+tb+fn+sm+an.xpi",            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+            "german_dictionary_de_de_new_orthography-20130905-tb+sm+an+fn+fx.xpi",            "dictionaries/de-DE.dic",             "dictionaries/de-DE.aff",
+            "german_dictionary_extended_for_austria-2.0.3-fx+fn+sm+tb.xpi",                   "dictionaries/de-AT.dic",             "dictionaries/de-AT.aff",
+            "german_dictionary_switzerland-2.0.3-sm+fx+tb+fn.xpi",                            "dictionaries/de-CH.dic",             "dictionaries/de-CH.aff",
+            "greek_spelling_dictionary-0.8.5-fx+tb+sm.xpi",                                   "dictionaries/el-GR.dic",             "dictionaries/el-GR.aff",
+            "gujarati_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                                  "dictionaries/gu_IN.dic",             "dictionaries/gu_IN.aff",
+            "haitian_creole_spell_checker-0.08-tb+sm+fx.xpi",                                 "dictionaries/ht-HT.dic",             "dictionaries/ht-HT.aff",
+            "hausa_spelling_dictionary-0.2-tb+fx.xpi",                                        "dictionaries/ha-GH.dic",             "dictionaries/ha-GH.aff",
+            "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi",              "dictionaries/he.dic",                "dictionaries/he.aff",
+            "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi",                                     "dictionaries/hi_IN.dic",             "dictionaries/hi_IN.aff",
+            "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu.dic",                "dictionaries/hu.aff",
+//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi",                                          "dictionaries/is.dic",                "dictionaries/is.aff",
+            "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi",                            "dictionaries/id.dic",                "dictionaries/id.aff",
+            "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi",                                 "dictionaries/kn.dic",                "dictionaries/kn.aff",
+            "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi",                                "dictionaries/Kaszebsczi.dic",        "dictionaries/Kaszebsczi.aff",
+            "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi",                                 "dictionaries/sw_TZ.dic",             "dictionaries/sw_TZ.aff",
+            "kurdish_spell_checker-0.96-fx+tb+sm.xpi",                                        "dictionaries/ku-TR.dic",             "dictionaries/ku-TR.aff",
+            "lao_spellchecking_dictionary-0-fx+tb+sm+fn+an.xpi",                              "dictionaries/lo_LA.dic",             "dictionaries/lo_LA.aff",
+            "latviesu_valodas_pareizrakstibas_parbaudes_vardnica-1.0.0-fn+fx+tb+sm.xpi",      "dictionaries/lv_LV.dic",             "dictionaries/lv_LV.aff",
+            "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi",                       "dictionaries/lt.dic",                "dictionaries/lt.aff",
+            "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi",                             "dictionaries/ga.dic",                "dictionaries/ga.aff",
+            "litreoir_na_liongailise-0.03-fx+sm+tb.xpi",                                      "dictionaries/ln-CD.dic",             "dictionaries/ln-CD.aff",
+            "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Cyrl.dic",        "dictionaries/mk-MK-Cyrl.aff",
+            "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Latn.dic",        "dictionaries/mk-MK-Latn.aff",
+            "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                  "dictionaries/mg_MG.dic",             "dictionaries/mg_MG.aff",
+            "marathi_dictionary-9.3-sm+tb+sb+fx.xpi",                                         "dictionaries/mr-IN.dic",             "dictionaries/mr-IN.aff",
+            "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi",                           "dictionaries/nr-ZA.dic",             "dictionaries/nr-ZA.aff",
+            "nepali_dictionary-1.2-fx+tb.xpi",                                                "dictionaries/ne_NP.dic",             "dictionaries/ne_NP.aff",
+            "norsk_bokmal_ordliste-2.0.10.2-fx+tb+sm.xpi",                                    "dictionaries/nb.dic",                "dictionaries/nb.aff",
+            "norsk_nynorsk_ordliste-2.1.0-sm+fx+tb.xpi",                                      "dictionaries/nn.dic",                "dictionaries/nn.aff",
+            "northern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi",                          "dictionaries/nso-ZA.dic",            "dictionaries/nso-ZA.aff",
+            "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/or-IN.dic",             "dictionaries/or-IN.aff",
+            "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi",                     "dictionaries/pl.dic",                "dictionaries/pl.aff",
+            "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi",                                   "dictionaries/pa-IN.dic",             "dictionaries/pa-IN.aff",
+            "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi",                            "dictionaries/ro_RO-ante1993.dic",    "dictionaries/ro_RO-ante1993.aff",
+            "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi",                       "dictionaries/ru_RU.dic",             "dictionaries/ru_RU.aff",
+            "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi",                                  "dictionaries/sa_IN.dic",             "dictionaries/sa_IN.aff",
+            "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi",                                 "dictionaries/gd-GB.dic",             "dictionaries/gd-GB.aff",
+            "serbian_dictionary-0.18-fx+tb+sm.xpi",                                           "dictionaries/sr-RS-Cyrl.dic",        "dictionaries/sr-RS-Cyrl.aff",
+            "serbian_dictionary-0.18-fx+tb+sm.xpi",                                           "dictionaries/sr-RS-Latn.dic",        "dictionaries/sr-RS-Latn.aff",
+            "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi",                           "dictionaries/sk-SK.dic",             "dictionaries/sk-SK.aff",
+            "slovak_spell_checking_dictionary-2.04.0-tb+fx+sm.xpi",                           "dictionaries/sk-SK-ascii.dic",       "dictionaries/sk-SK-ascii.aff",
+            "slovar_za_slovenski_jezik-0.1.1.1-fx+tb+sm.xpi",                                 "dictionaries/sl.dic",                "dictionaries/sl.aff",
+            "songhay_spell_checker-0.03-fx+tb+sm.xpi",                                        "dictionaries/Songhay - Mali.dic",    "dictionaries/Songhay - Mali.aff",
+            "southern_sotho_spell_checker-20110323-tb+fn+fx+sm.xpi",                          "dictionaries/st-ZA.dic",             "dictionaries/st-ZA.aff",
+            "sownik_acinski-0.41.20110603-tb+fx+sm.xpi",                                      "dictionaries/la.dic",                "dictionaries/la.aff",
+            "sownik_jezyka_dolnouzyckiego-1.4.8-an+fx+tb+fn+sm.xpi",                          "dictionaries/dsb.dic",               "dictionaries/dsb.aff",
+            "srpska_latinica-0.1-fx+tb+sm.xpi",                                               "dictionaries/Srpski_latinica.dic",   "dictionaries/Srpski_latinica.aff",
+            "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi",                                        "dictionaries/sv.dic",                "dictionaries/sv.aff",
+            "svenska_fria_ordlistan-1.1-tb+sm+fx.xpi",                                        "dictionaries/sv_FI.dic",             "dictionaries/sv_FI.aff",
+            "swati_spell_checker-20110323-tb+sm+fx+fn.xpi",                                   "dictionaries/ss-ZA.dic",             "dictionaries/ss-ZA.aff",
+            "tamil_spell_checker_for_firefox-0.4-tb+fx.xpi",                                  "dictionaries/ta-TA.dic",             "dictionaries/ta-TA.aff",
+            "telugu_spell_checker-0.3-tb+fx+sm.xpi",                                          "dictionaries/te_IN.dic",             "dictionaries/te_IN.aff",
+            "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi-x-Tai Tokerau.dic",  "dictionaries/mi-x-Tai Tokerau.aff",
+            "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi.dic",                "dictionaries/mi.aff",
+//BUG: broken file (hunspell refuses to load, too)    "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi",                           "dictionaries/ta_IN.dic",             "dictionaries/ta_IN.aff",
+            "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/ts-ZA.dic",             "dictionaries/ts-ZA.aff",
+            "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/tn-ZA.dic",             "dictionaries/tn-ZA.aff",
+//BUG: missing FLAG declaration "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi",                                          "dictionaries/tr.dic",                "dictionaries/tr.aff",
+            "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi",                            "dictionaries/tk_TM.dic",             "dictionaries/tk_TM.aff",
+            "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi",                                  "dictionaries/uk-UA.dic",             "dictionaries/uk-UA.aff",
+            "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi",                       "dictionaries/en-US.dic",             "dictionaries/en-US.aff",
+            "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi",                  "dictionaries/hsb.dic",               "dictionaries/hsb.aff",
+            "urdu_dictionary-0.64-fx+tb+sm+sb.xpi",                                           "dictionaries/ur.dic",                "dictionaries/ur.aff",
+            "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/uz.dic",                "dictionaries/uz.aff",
+            "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi",                             "dictionaries/ca-ES-valencia.dic",    "dictionaries/ca-ES-valencia.aff",
+            "venda_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/ve-ZA.dic",             "dictionaries/ve-ZA.aff",
+            "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi",     "dictionaries/pt_BR.dic",             "dictionaries/pt_BR.aff",
+            "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauCu.dic",          "dictionaries/vi-DauCu.aff",
+            "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauMoi.dic",         "dictionaries/vi-DauMoi.aff",
+            "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi",                                   "dictionaries/nl.dic",                "dictionaries/nl.aff",
+            "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/xh-ZA.dic",             "dictionaries/xh-ZA.aff",
+            "xuxen-4.0.1-fx+tb+sm.xpi",                                                       "dictionaries/eu.dic",                "dictionaries/eu.aff",
+            "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi",                               "dictionaries/yi.dic",                "dictionaries/yi.aff",
+            "zulu_spell_checker-20110323-tb+fn+fx+sm.xpi",                                    "dictionaries/zu-ZA.dic",             "dictionaries/zu-ZA.aff"
+        };
 
-		  using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
-		  {
-			ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-			Debug.Assert(dicEntry != null);
-			ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-			Debug.Assert(affEntry != null);
+        [Test]
+        public virtual void Test()
+        {
+            for (int i = 0; i < tests.Length; i += 3)
+            {
+                FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
+                Debug.Assert(f.Exists);
 
-			using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
-			{
-			  Dictionary dic = new Dictionary(affix, dictionary);
-			  Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" + "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " + "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " + "strips=" + RamUsageEstimator.humanSizeOf(dic.stripData) + ", " + "conditions=" + RamUsageEstimator.humanSizeOf(dic.patterns) + ", " + "affixData=" + RamUsageEstimator.humanSizeOf(dic.affixData) + ", " + "prefixes=" + RamUsageEstimator.humanSizeOf(dic.prefixes) + ", " + "suffixes=" + RamUsageEstimator.humanSizeOf(dic.suffixes) + ")");
-			}
-		  }
-		}
-	  }
+                using (Stream fileStream = f.OpenRead())
+                {
+                    using (ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8))
+                    {
+                        ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
+                        Debug.Assert(dicEntry != null);
+                        ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
+                        Debug.Assert(affEntry != null);
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void testOneDictionary() throws Exception
-	  public virtual void testOneDictionary()
-	  {
-		string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
-		for (int i = 0; i < tests.Length; i++)
-		{
-		  if (tests[i].Equals(toTest))
-		  {
-			File f = new File(DICTIONARY_HOME, tests[i]);
-			Debug.Assert(f.exists());
+                        using (Stream dictionary = dicEntry.Open())
+                        {
+                            using (Stream affix = affEntry.Open())
+                            {
+                                Dictionary dic = new Dictionary(affix, dictionary);
+                                Console.WriteLine(tests[i] + "\t" + RamUsageEstimator.HumanSizeOf(dic) + "\t(" +
+                                    "words=" + RamUsageEstimator.HumanSizeOf(dic.words) + ", " +
+                                    "flags=" + RamUsageEstimator.HumanSizeOf(dic.flagLookup) + ", " +
+                                    "strips=" + RamUsageEstimator.HumanSizeOf(dic.stripData) + ", " +
+                                    "conditions=" + RamUsageEstimator.HumanSizeOf(dic.patterns) + ", " +
+                                    "affixData=" + RamUsageEstimator.HumanSizeOf(dic.affixData) + ", " +
+                                    "prefixes=" + RamUsageEstimator.HumanSizeOf(dic.prefixes) + ", " +
+                                    "suffixes=" + RamUsageEstimator.HumanSizeOf(dic.suffixes) + ")");
+                            }
+                        }
+                    }
+                }
+            }
+        }
 
-			using (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8))
-			{
-			  ZipEntry dicEntry = zip.getEntry(tests[i + 1]);
-			  Debug.Assert(dicEntry != null);
-			  ZipEntry affEntry = zip.getEntry(tests[i + 2]);
-			  Debug.Assert(affEntry != null);
+        [Test]
+        public virtual void TestOneDictionary()
+        {
+            string toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
+            for (int i = 0; i < tests.Length; i++)
+            {
+                if (tests[i].Equals(toTest))
+                {
+                    FileInfo f = new FileInfo(System.IO.Path.Combine(DICTIONARY_HOME.FullName, tests[i]));
+                    Debug.Assert(f.Exists);
 
-			  using (System.IO.Stream dictionary = zip.getInputStream(dicEntry), System.IO.Stream affix = zip.getInputStream(affEntry))
-			  {
-				new Dictionary(affix, dictionary);
-			  }
-			}
-		  }
-		}
-	  }
-	}
+                    using (Stream fileStream = f.OpenRead())
+                    {
+                        using (ZipArchive zip = new ZipArchive(fileStream, ZipArchiveMode.Read, false, Encoding.UTF8))
+                        {
+                            ZipArchiveEntry dicEntry = zip.GetEntry(tests[i + 1]);
+                            Debug.Assert(dicEntry != null);
+                            ZipArchiveEntry affEntry = zip.GetEntry(tests[i + 2]);
+                            Debug.Assert(affEntry != null);
 
+                            using (Stream dictionary = dicEntry.Open())
+                            {
+                                using (Stream affix = affEntry.Open())
+                                {
+                                    new Dictionary(affix, dictionary);
+                                }
+                            }
+
+                        }
+                    }
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/efa13ffd/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 1b641b7..e44c6ad 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -154,6 +154,8 @@
     <Compile Include="Analysis\Hi\TestHindiNormalizer.cs" />
     <Compile Include="Analysis\Hi\TestHindiStemmer.cs" />
     <Compile Include="Analysis\Hunspell\StemmerTestBase.cs" />
+    <Compile Include="Analysis\Hunspell\TestAllDictionaries.cs" />
+    <Compile Include="Analysis\Hunspell\TestAllDictionaries2.cs" />
     <Compile Include="Analysis\Hunspell\TestCaseInsensitive.cs" />
     <Compile Include="Analysis\Hunspell\TestCircumfix.cs" />
     <Compile Include="Analysis\Hunspell\TestComplexPrefix.cs" />


[03/50] [abbrv] lucenenet git commit: Ported Analysis.Hunspell + tests

Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
index cb7bb6f..7e4cd4c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestOptionalCondition.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,33 +19,28 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestOptionalCondition : StemmerTestBase
-	{
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("optional-condition.aff", "condition.dic");
-	  }
-
-	  public virtual void testStemming()
-	  {
-		assertStemsTo("hello", "hello");
-		assertStemsTo("try", "try");
-		assertStemsTo("tried", "try");
-		assertStemsTo("work", "work");
-		assertStemsTo("worked", "work");
-		assertStemsTo("rework", "work");
-		assertStemsTo("reworked", "work");
-		assertStemsTo("retried");
-		assertStemsTo("workied");
-		assertStemsTo("tryed");
-		assertStemsTo("tryied");
-		assertStemsTo("helloed");
-	  }
-	}
-
+    public class TestOptionalCondition : StemmerTestBase
+    {
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("optional-condition.aff", "condition.dic");
+        }
+        [Test]
+        public virtual void TestStemming()
+        {
+            AssertStemsTo("hello", "hello");
+            AssertStemsTo("try", "try");
+            AssertStemsTo("tried", "try");
+            AssertStemsTo("work", "work");
+            AssertStemsTo("worked", "work");
+            AssertStemsTo("rework", "work");
+            AssertStemsTo("reworked", "work");
+            AssertStemsTo("retried");
+            AssertStemsTo("workied");
+            AssertStemsTo("tryed");
+            AssertStemsTo("tryied");
+            AssertStemsTo("helloed");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
index 93df12e..358ff99 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestStemmer.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,71 +19,68 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestStemmer : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("simple.aff", "simple.dic");
-	  }
-
-	  public virtual void testSimpleSuffix()
-	  {
-		assertStemsTo("lucene", "lucene", "lucen");
-		assertStemsTo("mahoute", "mahout");
-	  }
-
-	  public virtual void testSimplePrefix()
-	  {
-		assertStemsTo("solr", "olr");
-	  }
-
-	  public virtual void testRecursiveSuffix()
-	  {
-		// we should not recurse here, as the suffix has no continuation!
-		assertStemsTo("abcd");
-	  }
+    public class TestStemmer : StemmerTestBase
+    {
 
-	  // all forms unmunched from dictionary
-	  public virtual void testAllStems()
-	  {
-		assertStemsTo("ab", "ab");
-		assertStemsTo("abc", "ab");
-		assertStemsTo("apach", "apach");
-		assertStemsTo("apache", "apach");
-		assertStemsTo("foo", "foo", "foo");
-		assertStemsTo("food", "foo");
-		assertStemsTo("foos", "foo");
-		assertStemsTo("lucen", "lucen");
-		assertStemsTo("lucene", "lucen", "lucene");
-		assertStemsTo("mahout", "mahout");
-		assertStemsTo("mahoute", "mahout");
-		assertStemsTo("moo", "moo");
-		assertStemsTo("mood", "moo");
-		assertStemsTo("olr", "olr");
-		assertStemsTo("solr", "olr");
-	  }
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("simple.aff", "simple.dic");
+        }
+        [Test]
+        public virtual void TestSimpleSuffix()
+        {
+            AssertStemsTo("lucene", "lucene", "lucen");
+            AssertStemsTo("mahoute", "mahout");
+        }
+        [Test]
+        public virtual void TestSimplePrefix()
+        {
+            AssertStemsTo("solr", "olr");
+        }
+        [Test]
+        public virtual void TestRecursiveSuffix()
+        {
+            // we should not recurse here, as the suffix has no continuation!
+            AssertStemsTo("abcd");
+        }
 
-	  // some bogus stuff that should not stem (empty lists)!
-	  public virtual void testBogusStems()
-	  {
-		assertStemsTo("abs");
-		assertStemsTo("abe");
-		assertStemsTo("sab");
-		assertStemsTo("sapach");
-		assertStemsTo("sapache");
-		assertStemsTo("apachee");
-		assertStemsTo("sfoo");
-		assertStemsTo("sfoos");
-		assertStemsTo("fooss");
-		assertStemsTo("lucenee");
-		assertStemsTo("solre");
-	  }
-	}
+        // all forms unmunched from dictionary
+        [Test]
+        public virtual void TestAllStems()
+        {
+            AssertStemsTo("ab", "ab");
+            AssertStemsTo("abc", "ab");
+            AssertStemsTo("apach", "apach");
+            AssertStemsTo("apache", "apach");
+            AssertStemsTo("foo", "foo", "foo");
+            AssertStemsTo("food", "foo");
+            AssertStemsTo("foos", "foo");
+            AssertStemsTo("lucen", "lucen");
+            AssertStemsTo("lucene", "lucen", "lucene");
+            AssertStemsTo("mahout", "mahout");
+            AssertStemsTo("mahoute", "mahout");
+            AssertStemsTo("moo", "moo");
+            AssertStemsTo("mood", "moo");
+            AssertStemsTo("olr", "olr");
+            AssertStemsTo("solr", "olr");
+        }
 
+        // some bogus stuff that should not stem (empty lists)!
+        [Test]
+        public virtual void TestBogusStems()
+        {
+            AssertStemsTo("abs");
+            AssertStemsTo("abe");
+            AssertStemsTo("sab");
+            AssertStemsTo("sapach");
+            AssertStemsTo("sapache");
+            AssertStemsTo("apachee");
+            AssertStemsTo("sfoo");
+            AssertStemsTo("sfoos");
+            AssertStemsTo("fooss");
+            AssertStemsTo("lucenee");
+            AssertStemsTo("solre");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
index 3cecced..12ee1c0 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoFold.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,28 +19,23 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestTwoFold : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("twofold.aff", "morph.dic");
-	  }
-
-	  public virtual void testExamples()
-	  {
-		assertStemsTo("drink", "drink");
-		assertStemsTo("drinkable", "drink");
-		assertStemsTo("drinkables", "drink");
-		assertStemsTo("drinksable");
-		assertStemsTo("drinkableable");
-		assertStemsTo("drinks");
-	  }
-	}
+    public class TestTwoFold : StemmerTestBase
+    {
 
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("twofold.aff", "morph.dic");
+        }
+        [Test]
+        public virtual void TestExamples()
+        {
+            AssertStemsTo("drink", "drink");
+            AssertStemsTo("drinkable", "drink");
+            AssertStemsTo("drinkables", "drink");
+            AssertStemsTo("drinksable");
+            AssertStemsTo("drinkableable");
+            AssertStemsTo("drinks");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
index 23141fc..2799054 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/TestTwoSuffixes.cs
@@ -1,7 +1,8 @@
-\ufeffnamespace org.apache.lucene.analysis.hunspell
-{
+\ufeffusing NUnit.Framework;
 
-	/*
+namespace Lucene.Net.Analysis.Hunspell
+{
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -18,27 +19,22 @@
 	 * limitations under the License.
 	 */
 
-	using BeforeClass = org.junit.BeforeClass;
-
-	public class TestTwoSuffixes : StemmerTestBase
-	{
-
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @BeforeClass public static void beforeClass() throws Exception
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-	  public static void beforeClass()
-	  {
-		init("twosuffixes.aff", "twosuffixes.dic");
-	  }
-
-	  public virtual void testExamples()
-	  {
-		assertStemsTo("drink", "drink");
-		assertStemsTo("drinkable", "drink");
-		assertStemsTo("drinks", "drink");
-		assertStemsTo("drinkableable");
-		assertStemsTo("drinkss");
-	  }
-	}
+    public class TestTwoSuffixes : StemmerTestBase
+    {
 
+        [TestFixtureSetUp]
+        public static void BeforeClass()
+        {
+            Init("twosuffixes.aff", "twosuffixes.dic");
+        }
+        [Test]
+        public virtual void TestExamples()
+        {
+            AssertStemsTo("drink", "drink");
+            AssertStemsTo("drinkable", "drink");
+            AssertStemsTo("drinks", "drink");
+            AssertStemsTo("drinkableable");
+            AssertStemsTo("drinkss");
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken-flags.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken-flags.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken-flags.aff
new file mode 100644
index 0000000..0c189c8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken-flags.aff
@@ -0,0 +1,21 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX A Y 3
+SFX A   0     e         n
+SFX A   0     e         t
+SFX A   0     e         h
+
+SFX C Y 2
+SFX C   0     d/C       c
+SFX C   0     c         b
+
+SFX D Y 1
+SFX D   0     s         o
+
+SFX E Y 1
+SFX E   0     d         o
+
+# broken, the flag has too much in it
+PFX B0 Y 1
+PFX B0   0     s         o

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken.aff
new file mode 100644
index 0000000..8174179
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/broken.aff
@@ -0,0 +1,24 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX A Y 3
+SFX A   0     e         n
+SFX A   0     e         t
+SFX A   0     e         h
+
+SFX C Y 2
+SFX C   0     d/C       c
+SFX C   0     c         b
+
+SFX D Y 1
+SFX D   0     s         o
+
+SFX E Y 1
+SFX E   0     d         o
+
+PFX B Y 1
+PFX B   0     s         o
+
+#wrong rule (only 3 elements)
+PFX A0 Y 1
+PFX A0 0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.aff
new file mode 100644
index 0000000..fccad0d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.aff
@@ -0,0 +1,14 @@
+SET UTF-8
+
+CIRCUMFIX X
+
+PFX A Y 1
+PFX A 0 leg/X .
+
+PFX B Y 1
+PFX B 0 legesleg/X .
+
+SFX C Y 3
+SFX C 0 obb . +COMPARATIVE
+SFX C 0 obb/AX . +SUPERLATIVE
+SFX C 0 obb/BX . +SUPERSUPERLATIVE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
new file mode 100644
index 0000000..571e2e2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/circumfix.dic
@@ -0,0 +1,2 @@
+1
+nagy/C    [MN]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.aff
new file mode 100644
index 0000000..3bdfac2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.aff
@@ -0,0 +1,12 @@
+SET UTF-8
+
+COMPLEXPREFIXES
+
+PFX A Y 1
+PFX A   0 pone .
+
+PFX B Y 1
+PFX B   0 ptwo/A .
+
+SFX C Y 1
+SFX C   0 suf .
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.dic
new file mode 100644
index 0000000..aff57b5
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/complexprefix.dic
@@ -0,0 +1,2 @@
+1
+foo/BC

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-before-set.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-before-set.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-before-set.aff
new file mode 100644
index 0000000..e4a1b37
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-before-set.aff
@@ -0,0 +1,29 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
+AF 5
+AF AA
+AF BB
+AF CC
+AF DD
+AF EE
+
+SFX AA Y 3
+SFX AA   0     e         n
+SFX AA   0     e         t
+SFX AA   0     e         h
+
+SFX CC Y 2
+SFX CC   0     d/3       c
+SFX CC   0     c         b
+
+SFX DD Y 1
+SFX DD   0     s         o
+
+SFX EE Y 1
+SFX EE   0     d         o
+
+PFX BB Y 1
+PFX BB   0     s         o

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-empty-alias.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-empty-alias.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-empty-alias.aff
new file mode 100644
index 0000000..a27273f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed-empty-alias.aff
@@ -0,0 +1,30 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
+AF 6
+AF AA
+AF BB
+AF CC
+AF DD
+AF EE
+AF  
+
+SFX AA Y 3
+SFX AA   0     e         n
+SFX AA   0     e         t
+SFX AA   0     e         h
+
+SFX CC Y 2
+SFX CC   0     d/3       c
+SFX CC   0     c         b
+
+SFX DD Y 1
+SFX DD   0     s         o
+
+SFX EE Y 1
+SFX EE   0     d         o
+
+PFX BB Y 1
+PFX BB   0     s         o

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.aff
new file mode 100644
index 0000000..c747c27
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.aff
@@ -0,0 +1,29 @@
+AF 5
+AF AA
+AF BB
+AF CC
+AF DD
+AF EE
+
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
+SFX AA Y 3
+SFX AA   0     e         n
+SFX AA   0     e         t
+SFX AA   0     e         h
+
+SFX CC Y 2
+SFX CC   0     d/3       c
+SFX CC   0     c         b
+
+SFX DD Y 1
+SFX DD   0     s         o
+
+SFX EE Y 1
+SFX EE   0     d         o
+
+PFX BB Y 1
+PFX BB   0     s         o

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.dic
new file mode 100644
index 0000000..dd3890f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/compressed.dic
@@ -0,0 +1,9 @@
+6
+ab/3
+apach/1
+foo/4
+foo/5
+lucen/1
+lucene
+mahout/1
+olr/2

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.aff
new file mode 100644
index 0000000..4a836a7
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.aff
@@ -0,0 +1,13 @@
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ\u2019
+
+REP 2
+REP f ph
+REP ph f
+
+PFX A Y 1
+PFX A 0 re .
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.dic
new file mode 100644
index 0000000..e228043
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/condition.dic
@@ -0,0 +1,4 @@
+3
+hello
+try/B
+work/AB

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.aff
new file mode 100644
index 0000000..e860a87
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.aff
@@ -0,0 +1,16 @@
+SET UTF-8
+
+ICONV 4
+ICONV A a
+ICONV B b
+ICONV C c
+ICONV I i
+
+OCONV 4
+OCONV a A
+OCONV b B
+OCONV c C
+OCONV i I
+
+SFX X Y 1
+SFX X 0 able . +ABLE
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
new file mode 100644
index 0000000..6b68dc8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/conv.dic
@@ -0,0 +1,2 @@
+1
+drink/X   [VERB]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.aff
new file mode 100644
index 0000000..5589f3f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.aff
@@ -0,0 +1,13 @@
+SET UTF-8
+
+PFX P Y 1
+PFX P   0 un . [prefix_un]+
+
+SFX S Y 1
+SFX S   0 s . +PL
+
+SFX Q Y 1
+SFX Q   0 s . +3SGV
+
+SFX R Y 1
+SFX R   0 able/PS . +DER_V_ADJ_ABLE
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
new file mode 100644
index 0000000..bdba45e
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/dependencies.dic
@@ -0,0 +1,3 @@
+2
+drink/RQ  [verb]
+drink/S   [noun]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.aff
new file mode 100644
index 0000000..b428451
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.aff
@@ -0,0 +1,4 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A 0 s . +PLUR

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.dic
new file mode 100644
index 0000000..9360294
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/escaped.dic
@@ -0,0 +1,4 @@
+3
+work/A
+R2\/D2/A
+N\/A

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
new file mode 100644
index 0000000..d05a5da
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.aff
@@ -0,0 +1,4 @@
+SET UTF-8
+FLAG long
+SFX Y1 Y 1
+SFX Y1 0 s .

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.dic
new file mode 100644
index 0000000..8525878
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flaglong.dic
@@ -0,0 +1,2 @@
+1
+foo/Y1Z3F?

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.aff
new file mode 100644
index 0000000..f586426
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.aff
@@ -0,0 +1,4 @@
+SET UTF-8
+FLAG num
+SFX 65000 Y 1
+SFX 65000 0 s .

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.dic
new file mode 100644
index 0000000..cf28654
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/flagnum.dic
@@ -0,0 +1,3 @@
+1
+foo/65000,12,2756
+

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.aff
new file mode 100644
index 0000000..43b4bb1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.aff
@@ -0,0 +1,7 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A 0 s . +SG3
+
+SFX B Y 1
+SFX B 0 s . +PLUR

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
new file mode 100644
index 0000000..96d51f1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/homonyms.dic
@@ -0,0 +1,3 @@
+2
+work/A    [VERB]
+work/B    [NOUN]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.aff
new file mode 100644
index 0000000..65c4683
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.aff
@@ -0,0 +1,6 @@
+SET UTF-8
+
+IGNORE '-
+
+SFX X Y 1
+SFX X 0 able . +ABLE
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
new file mode 100644
index 0000000..9ae9205
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/ignore.dic
@@ -0,0 +1,3 @@
+1
+drink/X   [VERB]
+dr-ank/X  [VERB]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/mixedcase.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/mixedcase.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/mixedcase.dic
new file mode 100644
index 0000000..9fae253
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/mixedcase.dic
@@ -0,0 +1,10 @@
+9
+Ab/C
+apach/A
+Foo/D
+foo/E
+Lucen/A
+Lucene
+mahout/A
+Moo/E
+olr/B

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.aff
new file mode 100644
index 0000000..78cae52
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.aff
@@ -0,0 +1,4 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X 0 able . +ABLE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
new file mode 100644
index 0000000..6b68dc8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/morph.dic
@@ -0,0 +1,2 @@
+1
+drink/X   [VERB]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/optional-condition.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/optional-condition.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/optional-condition.aff
new file mode 100644
index 0000000..f2e3b9c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/optional-condition.aff
@@ -0,0 +1,14 @@
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ\u2019
+
+REP 2
+REP f ph
+REP ph f
+
+# has no condition
+PFX A Y 1
+PFX A 0 re
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.aff
new file mode 100644
index 0000000..db9423d
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.aff
@@ -0,0 +1,20 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+SFX A Y 3
+SFX A   0     e         n
+SFX A   0     e         t
+SFX A   0     e         h
+
+SFX C Y 2
+SFX C   0     d/C       c
+SFX C   0     c         b
+
+SFX D Y 1
+SFX D   0     s         o
+
+SFX E Y 1
+SFX E   0     d         o
+
+PFX B Y 1
+PFX B   0     s         o
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.dic
new file mode 100644
index 0000000..f7bbab3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/simple.dic
@@ -0,0 +1,10 @@
+9
+ab/C
+apach/A
+foo/D
+foo/E
+lucen/A
+lucene
+mahout/A
+moo/E
+olr/B

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twofold.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twofold.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twofold.aff
new file mode 100644
index 0000000..caea726
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twofold.aff
@@ -0,0 +1,7 @@
+SET UTF-8
+
+SFX Y Y 1
+SFX Y 0 s . +PLUR
+
+SFX X Y 1
+SFX X 0 able/Y . +ABLE

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.aff
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.aff b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.aff
new file mode 100644
index 0000000..5d911ac
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.aff
@@ -0,0 +1,7 @@
+SET UTF-8
+
+SFX X Y 1
+SFX X 0 able . +ABLE
+
+SFX X Y 1
+SFX X 0 s . +s

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
new file mode 100644
index 0000000..6b68dc8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/twosuffixes.dic
@@ -0,0 +1,2 @@
+1
+drink/X   [VERB]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/BaseTokenStreamFactoryTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/BaseTokenStreamFactoryTestCase.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/BaseTokenStreamFactoryTestCase.cs
index b516ff1..4518f9d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/BaseTokenStreamFactoryTestCase.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/BaseTokenStreamFactoryTestCase.cs
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Util
     ///   TokenStream stream = tokenizerFactory("standard").create(reader);
     ///   stream = tokenFilterFactory("lowercase").create(stream);
     ///   stream = tokenFilterFactory("asciifolding").create(stream);
-    ///   assertTokenStreamContents(stream, new String[] { "some", "text", "to", "analyze" });
+    ///   AssertTokenStreamContents(stream, new String[] { "some", "text", "to", "analyze" });
     /// </pre></code>
     /// </para>
     /// </summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e4d9f44c/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
index 26e5e63..1b641b7 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
+++ b/src/Lucene.Net.Tests.Analysis.Common/Lucene.Net.Tests.Analysis.Common.csproj
@@ -47,6 +47,7 @@
     <Reference Include="System" />
     <Reference Include="System.Core" />
     <Reference Include="System.Data" />
+    <Reference Include="System.IO.Compression" />
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
@@ -152,6 +153,26 @@
     <Compile Include="Analysis\Hi\TestHindiFilters.cs" />
     <Compile Include="Analysis\Hi\TestHindiNormalizer.cs" />
     <Compile Include="Analysis\Hi\TestHindiStemmer.cs" />
+    <Compile Include="Analysis\Hunspell\StemmerTestBase.cs" />
+    <Compile Include="Analysis\Hunspell\TestCaseInsensitive.cs" />
+    <Compile Include="Analysis\Hunspell\TestCircumfix.cs" />
+    <Compile Include="Analysis\Hunspell\TestComplexPrefix.cs" />
+    <Compile Include="Analysis\Hunspell\TestCondition.cs" />
+    <Compile Include="Analysis\Hunspell\TestConv.cs" />
+    <Compile Include="Analysis\Hunspell\TestDependencies.cs" />
+    <Compile Include="Analysis\Hunspell\TestDictionary.cs" />
+    <Compile Include="Analysis\Hunspell\TestEscaped.cs" />
+    <Compile Include="Analysis\Hunspell\TestFlagLong.cs" />
+    <Compile Include="Analysis\Hunspell\TestFlagNum.cs" />
+    <Compile Include="Analysis\Hunspell\TestHomonyms.cs" />
+    <Compile Include="Analysis\Hunspell\TestHunspellStemFilter.cs" />
+    <Compile Include="Analysis\Hunspell\TestHunspellStemFilterFactory.cs" />
+    <Compile Include="Analysis\Hunspell\TestIgnore.cs" />
+    <Compile Include="Analysis\Hunspell\TestMorph.cs" />
+    <Compile Include="Analysis\Hunspell\TestOptionalCondition.cs" />
+    <Compile Include="Analysis\Hunspell\TestStemmer.cs" />
+    <Compile Include="Analysis\Hunspell\TestTwoFold.cs" />
+    <Compile Include="Analysis\Hunspell\TestTwoSuffixes.cs" />
     <Compile Include="Analysis\Hu\TestHungarianAnalyzer.cs" />
     <Compile Include="Analysis\Hu\TestHungarianLightStemFilter.cs" />
     <Compile Include="Analysis\Hu\TestHungarianLightStemFilterFactory.cs" />
@@ -325,6 +346,41 @@
     <EmbeddedResource Include="Analysis\Hu\hulighttestdata.zip" />
     <EmbeddedResource Include="Analysis\It\itlighttestdata.zip" />
     <EmbeddedResource Include="Analysis\Sv\svlighttestdata.zip" />
+    <EmbeddedResource Include="Analysis\Hunspell\broken-flags.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\broken.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\circumfix.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\circumfix.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\complexprefix.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\complexprefix.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\compressed-before-set.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\compressed-empty-alias.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\compressed.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\compressed.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\condition.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\condition.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\conv.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\conv.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\dependencies.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\dependencies.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\escaped.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\escaped.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\flaglong.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\flaglong.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\flagnum.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\flagnum.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\homonyms.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\homonyms.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\ignore.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\ignore.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\mixedcase.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\morph.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\morph.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\optional-condition.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\simple.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\simple.dic" />
+    <EmbeddedResource Include="Analysis\Hunspell\twofold.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\twosuffixes.aff" />
+    <EmbeddedResource Include="Analysis\Hunspell\twosuffixes.dic" />
     <None Include="packages.config" />
   </ItemGroup>
   <ItemGroup>
@@ -420,6 +476,7 @@
     <EmbeddedResource Include="Analysis\No\nn_light.txt" />
     <EmbeddedResource Include="Analysis\No\nn_minimal.txt" />
   </ItemGroup>
+  <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.


[22/50] [abbrv] lucenenet git commit: Added missing call to Queries.Function.ValueSources.TermFreqValueSource.Reset(), which was causing the Queries.Function.TestValueSources.TestTermFreq() test to fail.

Posted by sy...@apache.org.
Added missing call to Queries.Function.ValueSources.TermFreqValueSource.Reset(), which was causing the Queries.Function.TestValueSources.TestTermFreq() test to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1fa4ed9b
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1fa4ed9b
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1fa4ed9b

Branch: refs/heads/analysis-work
Commit: 1fa4ed9bda6ba35d4497345da18346ca86aea412
Parents: 22663d7
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Sun Aug 21 07:54:49 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sun Aug 21 07:54:49 2016 +0700

----------------------------------------------------------------------
 .../Function/ValueSources/TermFreqValueSource.cs               | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1fa4ed9b/src/Lucene.Net.Queries/Function/ValueSources/TermFreqValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSources/TermFreqValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSources/TermFreqValueSource.cs
index 69888e0..8f846d1 100644
--- a/src/Lucene.Net.Queries/Function/ValueSources/TermFreqValueSource.cs
+++ b/src/Lucene.Net.Queries/Function/ValueSources/TermFreqValueSource.cs
@@ -64,17 +64,13 @@ namespace Lucene.Net.Queries.Function.ValueSources
                 this.outerInstance = outerInstance;
                 this.terms = terms;
                 lastDocRequested = -1;
+                Reset();
             }
 
             private DocsEnum docs;
             private int atDoc;
             private int lastDocRequested;
 
-            //JAVA TO C# CONVERTER TODO TASK: Initialization blocks declared within anonymous inner classes are not converted:
-            //	  {
-            //		  reset();
-            //	  }
-
             public virtual void Reset()
             {
                 // no one should call us for deleted docs?