You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/09/16 00:24:55 UTC
[8/8] git commit: Porting Lucene.Net.Suggest (still not compiling)
Porting Lucene.Net.Suggest (still not compiling)
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0ebac726
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0ebac726
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0ebac726
Branch: refs/heads/master
Commit: 0ebac7269dbc076fbd9c33855d59f75716444fab
Parents: 6e90056
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Tue Sep 16 01:22:45 2014 +0300
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Tue Sep 16 01:23:19 2014 +0300
----------------------------------------------------------------------
src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs | 4 +-
src/Lucene.Net.Core/Index/IndexableField.cs | 2 +
src/Lucene.Net.Core/Search/SearcherManager.cs | 1 +
src/Lucene.Net.Core/Util/OfflineSorter.cs | 1358 +++++++++---------
.../Lucene.Net.Suggest.csproj | 108 ++
.../Properties/AssemblyInfo.cs | 36 +
src/Lucene.Net.Suggest/RectangularArrays.cs | 29 +
.../Spell/CombineSuggestion.cs | 47 +
src/Lucene.Net.Suggest/Spell/Dictionary.cs | 35 +
.../Spell/DirectSpellChecker.cs | 575 ++++++++
.../Spell/HighFrequencyDictionary.cs | 151 ++
.../Spell/JaroWinklerDistance.cs | 173 +++
.../Spell/LevensteinDistance.cs | 144 ++
.../Spell/LuceneDictionary.cs | 58 +
.../Spell/LuceneLevenshteinDistance.cs | 136 ++
src/Lucene.Net.Suggest/Spell/NGramDistance.cs | 195 +++
.../Spell/PlainTextDictionary.cs | 134 ++
src/Lucene.Net.Suggest/Spell/SpellChecker.cs | 748 ++++++++++
src/Lucene.Net.Suggest/Spell/StringDistance.cs | 36 +
src/Lucene.Net.Suggest/Spell/SuggestMode.cs | 46 +
src/Lucene.Net.Suggest/Spell/SuggestWord.cs | 53 +
.../Spell/SuggestWordFrequencyComparator.cs | 64 +
.../Spell/SuggestWordQueue.cs | 65 +
.../Spell/SuggestWordScoreComparator.cs | 64 +
.../Spell/TermFreqIterator.cs | 68 +
.../Spell/WordBreakSpellChecker.cs | 542 +++++++
src/Lucene.Net.Suggest/StringHelperClass.cs | 90 ++
.../Analyzing/AnalyzingInfixSuggester.cs | 792 ++++++++++
.../Suggest/Analyzing/AnalyzingSuggester.cs | 1093 ++++++++++++++
.../Suggest/Analyzing/BlendedInfixSuggester.cs | 316 ++++
.../Suggest/Analyzing/FSTUtil.cs | 146 ++
.../Suggest/Analyzing/FreeTextSuggester.cs | 929 ++++++++++++
.../Suggest/Analyzing/FuzzySuggester.cs | 271 ++++
.../Suggest/Analyzing/SuggestStopFilter.cs | 138 ++
.../Suggest/BufferedInputIterator.cs | 139 ++
.../Suggest/BufferingTermFreqIteratorWrapper.cs | 89 ++
.../Suggest/DocumentDictionary.cs | 278 ++++
.../Suggest/DocumentValueSourceDictionary.cs | 169 +++
.../Suggest/FileDictionary.cs | 284 ++++
.../Suggest/Fst/BytesRefSorter.cs | 34 +
.../Suggest/Fst/ExternalRefSorter.cs | 150 ++
.../Suggest/Fst/FSTCompletion.cs | 467 ++++++
.../Suggest/Fst/FSTCompletionBuilder.cs | 274 ++++
.../Suggest/Fst/FSTCompletionLookup.cs | 337 +++++
.../Suggest/Fst/WFSTCompletionLookup.cs | 348 +++++
.../Suggest/InMemorySorter.cs | 70 +
src/Lucene.Net.Suggest/Suggest/InputIterator.cs | 124 ++
.../Suggest/Jaspell/JaspellLookup.cs | 258 ++++
.../Suggest/Jaspell/JaspellTernarySearchTrie.cs | 986 +++++++++++++
src/Lucene.Net.Suggest/Suggest/Lookup.cs | 299 ++++
.../Suggest/SortedInputIterator.cs | 353 +++++
.../Suggest/SortedTermFreqIteratorWrapper.cs | 230 +++
.../Suggest/Tst/TSTAutocomplete.cs | 207 +++
src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs | 295 ++++
.../Suggest/Tst/TernaryTreeNode.cs | 78 +
.../Suggest/UnsortedInputIterator.cs | 108 ++
56 files changed, 13543 insertions(+), 681 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
index 2a7b543..b18e17a 100644
--- a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
+++ b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
@@ -62,7 +62,7 @@ namespace Lucene.Net.Analysis
/// </summary>
/// <param name="fieldName"> Name of the field which is to be analyzed </param>
/// <returns> Analyzer for the field with the given name. Assumed to be non-null </returns>
- protected internal abstract Analyzer GetWrappedAnalyzer(string fieldName);
+ protected abstract Analyzer GetWrappedAnalyzer(string fieldName);
/// <summary>
/// Wraps / alters the given TokenStreamComponents, taken from the wrapped
@@ -75,7 +75,7 @@ namespace Lucene.Net.Analysis
/// <param name="components">
/// TokenStreamComponents taken from the wrapped Analyzer </param>
/// <returns> Wrapped / altered TokenStreamComponents. </returns>
- protected internal virtual TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
+ protected virtual TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
{
return components;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Index/IndexableField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/IndexableField.cs b/src/Lucene.Net.Core/Index/IndexableField.cs
index 25846bc..d26c639 100644
--- a/src/Lucene.Net.Core/Index/IndexableField.cs
+++ b/src/Lucene.Net.Core/Index/IndexableField.cs
@@ -1,3 +1,5 @@
+using Lucene.Net.Search.Similarities;
+
namespace Lucene.Net.Index
{
using System.IO;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Search/SearcherManager.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/SearcherManager.cs b/src/Lucene.Net.Core/Search/SearcherManager.cs
index 1c2dee5..8c80a43 100644
--- a/src/Lucene.Net.Core/Search/SearcherManager.cs
+++ b/src/Lucene.Net.Core/Search/SearcherManager.cs
@@ -1,4 +1,5 @@
using System.Diagnostics;
+using System.IO;
namespace Lucene.Net.Search
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Util/OfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OfflineSorter.cs b/src/Lucene.Net.Core/Util/OfflineSorter.cs
index aefb07c..f966e77 100644
--- a/src/Lucene.Net.Core/Util/OfflineSorter.cs
+++ b/src/Lucene.Net.Core/Util/OfflineSorter.cs
@@ -1,679 +1,679 @@
-//using System;
-//using System.Collections.Generic;
-//using System.Diagnostics;
-//using System.IO;
-//
-//namespace Lucene.Net.Util
-//{
-// /*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements. See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License. You may obtain a copy of the License at
-// *
-// * http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-// /// <summary>
-// /// On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
-// /// fields:
-// /// <ul>
-// /// <li>(two bytes) length of the following byte array,
-// /// <li>exactly the above count of bytes for the sequence to be sorted.
-// /// </ul>
-// /// </summary>
-// public sealed class OfflineSorter
-// {
-// private bool InstanceFieldsInitialized = false;
-//
-// private void InitializeInstanceFields()
-// {
-// Buffer = new BytesRefArray(BufferBytesUsed);
-// }
-//
-// /// <summary>
-// /// Convenience constant for megabytes </summary>
-// public const long MB = 1024 * 1024;
-// /// <summary>
-// /// Convenience constant for gigabytes </summary>
-// public static readonly long GB = MB * 1024;
-//
-// /// <summary>
-// /// Minimum recommended buffer size for sorting.
-// /// </summary>
-// public const long MIN_BUFFER_SIZE_MB = 32;
-//
-// /// <summary>
-// /// Absolute minimum required buffer size for sorting.
-// /// </summary>
-// public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
-// private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
-//
-// /// <summary>
-// /// Maximum number of temporary files before doing an intermediate merge.
-// /// </summary>
-// public const int MAX_TEMPFILES = 128;
-//
-// /// <summary>
-// /// A bit more descriptive unit for constructors.
-// /// </summary>
-// /// <seealso cref= #automatic() </seealso>
-// /// <seealso cref= #megabytes(long) </seealso>
-// public sealed class BufferSize
-// {
-// internal readonly int Bytes;
-//
-// internal BufferSize(long bytes)
-// {
-// if (bytes > int.MaxValue)
-// {
-// throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
-// }
-//
-// if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-// {
-// throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
-// }
-//
-// this.Bytes = (int)bytes;
-// }
-//
-// /// <summary>
-// /// Creates a <seealso cref="BufferSize"/> in MB. The given
-// /// values must be > 0 and < 2048.
-// /// </summary>
-// public static BufferSize Megabytes(long mb)
-// {
-// return new BufferSize(mb * MB);
-// }
-//
-// /// <summary>
-// /// Approximately half of the currently available free heap, but no less
-// /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
-// /// is insufficient or if there is a large portion of unallocated heap-space available
-// /// for sorting consult with max allowed heap size.
-// /// </summary>
-// public static BufferSize Automatic()
-// {
-// var proc = Process.GetCurrentProcess();
-//
-// // take sizes in "conservative" order
-// long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
-// long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
-// long free = rt.freeMemory(); // unused portion of currently allocated
-// long totalAvailableBytes = max - total + free;
-//
-// // by free mem (attempting to not grow the heap for this)
-// long sortBufferByteSize = free / 2;
-// const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
-// if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
-// {
-// if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
-// {
-// sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
-// }
-// else
-// {
-// //heap seems smallish lets be conservative fall back to the free/2
-// sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
-// }
-// }
-// return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
-// }
-// }
-//
-// /// <summary>
-// /// Sort info (debugging mostly).
-// /// </summary>
-// public class SortInfo
-// {
-// internal bool InstanceFieldsInitialized = false;
-//
-// internal virtual void InitializeInstanceFields()
-// {
-// BufferSize = OuterInstance.RamBufferSize.Bytes;
-// }
-//
-// private readonly OfflineSorter OuterInstance;
-//
-// /// <summary>
-// /// number of temporary files created when merging partitions </summary>
-// public int TempMergeFiles;
-// /// <summary>
-// /// number of partition merges </summary>
-// public int MergeRounds;
-// /// <summary>
-// /// number of lines of data read </summary>
-// public int Lines;
-// /// <summary>
-// /// time spent merging sorted partitions (in milliseconds) </summary>
-// public long MergeTime;
-// /// <summary>
-// /// time spent sorting data (in milliseconds) </summary>
-// public long SortTime;
-// /// <summary>
-// /// total time spent (in milliseconds) </summary>
-// public long TotalTime;
-// /// <summary>
-// /// time spent in i/o read (in milliseconds) </summary>
-// public long ReadTime;
-// /// <summary>
-// /// read buffer size (in bytes) </summary>
-// public long BufferSize;
-//
-// /// <summary>
-// /// create a new SortInfo (with empty statistics) for debugging </summary>
-// public SortInfo(OfflineSorter outerInstance)
-// {
-// this.OuterInstance = outerInstance;
-//
-// if (!InstanceFieldsInitialized)
-// {
-// InitializeInstanceFields();
-// InstanceFieldsInitialized = true;
-// }
-// }
-//
-// public override string ToString()
-// {
-// return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
-// }
-// }
-//
-// private readonly BufferSize RamBufferSize;
-//
-// private readonly Counter BufferBytesUsed = Counter.NewCounter();
-// private BytesRefArray Buffer;
-// private SortInfo sortInfo;
-// private readonly int MaxTempFiles;
-// private readonly IComparer<BytesRef> comparator;
-//
-// /// <summary>
-// /// Default comparator: sorts in binary (codepoint) order </summary>
-// public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
-//
-// /// <summary>
-// /// Defaults constructor.
-// /// </summary>
-// /// <seealso cref= #defaultTempDir() </seealso>
-// /// <seealso cref= BufferSize#automatic() </seealso>
-// public OfflineSorter()
-// : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-// {
-// if (!InstanceFieldsInitialized)
-// {
-// InitializeInstanceFields();
-// InstanceFieldsInitialized = true;
-// }
-// }
-//
-// /// <summary>
-// /// Defaults constructor with a custom comparator.
-// /// </summary>
-// /// <seealso cref= #defaultTempDir() </seealso>
-// /// <seealso cref= BufferSize#automatic() </seealso>
-// public OfflineSorter(IComparer<BytesRef> comparator)
-// : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-// {
-// if (!InstanceFieldsInitialized)
-// {
-// InitializeInstanceFields();
-// InstanceFieldsInitialized = true;
-// }
-// }
-//
-// /// <summary>
-// /// All-details constructor.
-// /// </summary>
-// public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
-// {
-// if (!InstanceFieldsInitialized)
-// {
-// InitializeInstanceFields();
-// InstanceFieldsInitialized = true;
-// }
-// if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-// {
-// throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
-// }
-//
-// if (maxTempfiles < 2)
-// {
-// throw new System.ArgumentException("maxTempFiles must be >= 2");
-// }
-//
-// this.RamBufferSize = ramBufferSize;
-// this.MaxTempFiles = maxTempfiles;
-// this.comparator = comparator;
-// }
-//
-// /// <summary>
-// /// Sort input to output, explicit hint for the buffer size. The amount of allocated
-// /// memory may deviate from the hint (may be smaller or larger).
-// /// </summary>
-// public SortInfo Sort(FileInfo input, FileInfo output)
-// {
-// sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
-//
-// output.Delete();
-//
-// var merges = new List<FileInfo>();
-// bool success2 = false;
-// try
-// {
-// var inputStream = new ByteSequencesReader(input);
-// bool success = false;
-// try
-// {
-// int lines = 0;
-// while ((lines = ReadPartition(inputStream)) > 0)
-// {
-// merges.Add(SortPartition(lines));
-// sortInfo.TempMergeFiles++;
-// sortInfo.Lines += lines;
-//
-// // Handle intermediate merges.
-// if (merges.Count == MaxTempFiles)
-// {
-// var intermediate = new FileInfo(Path.GetTempFileName());
-// try
-// {
-// MergePartitions(merges, intermediate);
-// }
-// finally
-// {
-// foreach (var file in merges)
-// {
-// file.Delete();
-// }
-// merges.Clear();
-// merges.Add(intermediate);
-// }
-// sortInfo.TempMergeFiles++;
-// }
-// }
-// success = true;
-// }
-// finally
-// {
-// if (success)
-// {
-// IOUtils.Close(inputStream);
-// }
-// else
-// {
-// IOUtils.CloseWhileHandlingException(inputStream);
-// }
-// }
-//
-// // One partition, try to rename or copy if unsuccessful.
-// if (merges.Count == 1)
-// {
-// FileInfo single = merges[0];
-// Copy(single, output);
-// try
-// {
-// File.Delete(single.FullName);
-// }
-// catch (Exception)
-// {
-// // ignored
-// }
-// }
-// else
-// {
-// // otherwise merge the partitions with a priority queue.
-// MergePartitions(merges, output);
-// }
-// success2 = true;
-// }
-// finally
-// {
-// foreach (FileInfo file in merges)
-// {
-// file.Delete();
-// }
-// if (!success2)
-// {
-// output.Delete();
-// }
-// }
-//
-// sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
-// return sortInfo;
-// }
-//
-// /// <summary>
-// /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
-// /// or not available, an IOException is thrown
-// /// </summary>
-// public static DirectoryInfo DefaultTempDir()
-// {
-// return new DirectoryInfo(Path.GetTempPath());
-// }
-//
-// /// <summary>
-// /// Copies one file to another.
-// /// </summary>
-// private static void Copy(FileInfo file, FileInfo output)
-// {
-// File.Copy(file.FullName, output.FullName);
-// }
-//
-// /// <summary>
-// /// Sort a single partition in-memory. </summary>
-// internal FileInfo SortPartition(int len)
-// {
-// var data = this.Buffer;
-// var tempFile = new FileInfo(Path.GetTempFileName());
-// //var tempFile1 = File.Create(new ());
-// //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
-//
-// long start = DateTime.Now.Millisecond;
-// sortInfo.SortTime += (DateTime.Now.Millisecond - start);
-//
-// var @out = new ByteSequencesWriter(tempFile);
-// BytesRef spare;
-// try
-// {
-// BytesRefIterator iter = Buffer.Iterator(comparator);
-// while ((spare = iter.Next()) != null)
-// {
-// Debug.Assert(spare.Length <= short.MaxValue);
-// @out.Write(spare);
-// }
-//
-// @out.Dispose();
-//
-// // Clean up the buffer for the next partition.
-// data.Clear();
-// return tempFile;
-// }
-// finally
-// {
-// IOUtils.Close(@out);
-// }
-// }
-//
-// /// <summary>
-// /// Merge a list of sorted temporary files (partitions) into an output file </summary>
-// internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
-// {
-// long start = DateTime.Now.Millisecond;
-//
-// var @out = new ByteSequencesWriter(outputFile);
-//
-// PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
-//
-// var streams = new ByteSequencesReader[merges.Count];
-// try
-// {
-// // Open streams and read the top for each file
-// for (int i = 0; i < merges.Count; i++)
-// {
-// streams[i] = new ByteSequencesReader(merges[i]);
-// sbyte[] line = streams[i].Read();
-// if (line != null)
-// {
-// queue.InsertWithOverflow(new FileAndTop(i, line));
-// }
-// }
-//
-// // Unix utility sort() uses ordered array of files to pick the next line from, updating
-// // it as it reads new lines. The PQ used here is a more elegant solution and has
-// // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
-// // so it shouldn't make much of a difference (didn't check).
-// FileAndTop top;
-// while ((top = queue.Top()) != null)
-// {
-// @out.Write(top.Current);
-// if (!streams[top.Fd].Read(top.Current))
-// {
-// queue.Pop();
-// }
-// else
-// {
-// queue.UpdateTop();
-// }
-// }
-//
-// SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
-// SortInfo.MergeRounds++;
-// }
-// finally
-// {
-// // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
-// // happening in closing streams.
-// try
-// {
-// IOUtils.Close(streams);
-// }
-// finally
-// {
-// IOUtils.Close(@out);
-// }
-// }
-// }
-//
-// private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
-// {
-// private readonly OfflineSorter OuterInstance;
-//
-// public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
-// : base(size)
-// {
-// this.OuterInstance = outerInstance;
-// }
-//
-// public override bool LessThan(FileAndTop a, FileAndTop b)
-// {
-// return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
-// }
-// }
-//
-// /// <summary>
-// /// Read in a single partition of data </summary>
-// internal int ReadPartition(ByteSequencesReader reader)
-// {
-// long start = DateTime.Now.Millisecond;
-// var scratch = new BytesRef();
-// while ((scratch.Bytes = reader.Read()) != null)
-// {
-// scratch.Length = scratch.Bytes.Length;
-// Buffer.Append(scratch);
-// // Account for the created objects.
-// // (buffer slots do not account to buffer size.)
-// if (RamBufferSize.Bytes < BufferBytesUsed.Get())
-// {
-// break;
-// }
-// }
-// sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
-// return Buffer.Size();
-// }
-//
-// internal class FileAndTop
-// {
-// internal readonly int Fd;
-// internal readonly BytesRef Current;
-//
-// internal FileAndTop(int fd, sbyte[] firstLine)
-// {
-// this.Fd = fd;
-// this.Current = new BytesRef(firstLine);
-// }
-// }
-//
-// /// <summary>
-// /// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
-// /// Complementary to <seealso cref="ByteSequencesReader"/>.
-// /// </summary>
-// public class ByteSequencesWriter : IDisposable
-// {
-// internal readonly DataOutput Os;
-//
-// /// <summary>
-// /// Constructs a ByteSequencesWriter to the provided File </summary>
-// public ByteSequencesWriter(FileInfo file)
-// : this(new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file))))
-// {
-// }
-//
-// /// <summary>
-// /// Constructs a ByteSequencesWriter to the provided DataOutput </summary>
-// public ByteSequencesWriter(DataOutput os)
-// {
-// this.Os = os;
-// }
-//
-// /// <summary>
-// /// Writes a BytesRef. </summary>
-// /// <seealso cref= #write(byte[], int, int) </seealso>
-// public virtual void Write(BytesRef @ref)
-// {
-// Debug.Assert(@ref != null);
-// Write(@ref.Bytes, @ref.Offset, @ref.Length);
-// }
-//
-// /// <summary>
-// /// Writes a byte array. </summary>
-// /// <seealso cref= #write(byte[], int, int) </seealso>
-// public virtual void Write(sbyte[] bytes)
-// {
-// Write(bytes, 0, bytes.Length);
-// }
-//
-// /// <summary>
-// /// Writes a byte array.
-// /// <p>
-// /// The length is written as a <code>short</code>, followed
-// /// by the bytes.
-// /// </summary>
-// public virtual void Write(sbyte[] bytes, int off, int len)
-// {
-// Debug.Assert(bytes != null);
-// Debug.Assert(off >= 0 && off + len <= bytes.Length);
-// Debug.Assert(len >= 0);
-// Os.WriteShort(len);
-// Os.Write(bytes, off, len);
-// }
-//
-// /// <summary>
-// /// Closes the provided <seealso cref="DataOutput"/> if it is <seealso cref="IDisposable"/>.
-// /// </summary>
-// public void Dispose()
-// {
-// var os = Os as IDisposable;
-// if (os != null)
-// {
-// os.Dispose();
-// }
-// }
-// }
-//
-// /// <summary>
-// /// Utility class to read length-prefixed byte[] entries from an input.
-// /// Complementary to <seealso cref="ByteSequencesWriter"/>.
-// /// </summary>
-// public class ByteSequencesReader : IDisposable
-// {
-// internal readonly DataInput inputStream;
-//
-// /// <summary>
-// /// Constructs a ByteSequencesReader from the provided File </summary>
-// public ByteSequencesReader(FileInfo file)
-// : this(new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
-// {
-// }
-//
-// /// <summary>
-// /// Constructs a ByteSequencesReader from the provided DataInput </summary>
-// public ByteSequencesReader(DataInput inputStream)
-// {
-// this.inputStream = inputStream;
-// }
-//
-// /// <summary>
-// /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
-// /// storage is resized if needed.
-// /// </summary>
-// /// <returns> Returns <code>false</code> if EOF occurred when trying to read
-// /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
-// /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-// public virtual bool Read(BytesRef @ref)
-// {
-// short length;
-// try
-// {
-// length = inputStream.ReadShort();
-// }
-// catch (EOFException)
-// {
-// return false;
-// }
-//
-// @ref.Grow(length);
-// @ref.Offset = 0;
-// @ref.Length = length;
-// inputStream.ReadFully(@ref.Bytes, 0, length);
-// return true;
-// }
-//
-// /// <summary>
-// /// Reads the next entry and returns it if successful.
-// /// </summary>
-// /// <seealso cref= #read(BytesRef)
-// /// </seealso>
-// /// <returns> Returns <code>null</code> if EOF occurred before the next entry
-// /// could be read. </returns>
-// /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-// public virtual sbyte[] Read()
-// {
-// short length;
-// try
-// {
-// length = inputStream.ReadShort();
-// }
-// catch (EOFException e)
-// {
-// return null;
-// }
-//
-// Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
-// sbyte[] result = new sbyte[length];
-// inputStream.ReadFully(result);
-// return result;
-// }
-//
-// /// <summary>
-// /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
-// /// </summary>
-// public void Dispose()
-// {
-// var @is = inputStream as IDisposable;
-// if (@is != null)
-// {
-// @is.Dispose();
-// }
-// }
-// }
-//
-// /// <summary>
-// /// Returns the comparator in use to sort entries </summary>
-// public IComparer<BytesRef> Comparator
-// {
-// get
-// {
-// return comparator;
-// }
-// }
-// }
-//}
\ No newline at end of file
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Util
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
+ /// fields:
+ /// <ul>
+ /// <li>(two bytes) length of the following byte array,
+ /// <li>exactly the above count of bytes for the sequence to be sorted.
+ /// </ul>
+ /// </summary>
+ public sealed class OfflineSorter
+ {
+ private bool InstanceFieldsInitialized = false;
+
+ private void InitializeInstanceFields()
+ {
+ Buffer = new BytesRefArray(BufferBytesUsed);
+ }
+
+ /// <summary>
+ /// Convenience constant for megabytes </summary>
+ public const long MB = 1024 * 1024;
+ /// <summary>
+ /// Convenience constant for gigabytes </summary>
+ public static readonly long GB = MB * 1024;
+
+ /// <summary>
+ /// Minimum recommended buffer size for sorting.
+ /// </summary>
+ public const long MIN_BUFFER_SIZE_MB = 32;
+
+ /// <summary>
+ /// Absolute minimum required buffer size for sorting.
+ /// </summary>
+ public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
+ private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
+
+ /// <summary>
+ /// Maximum number of temporary files before doing an intermediate merge.
+ /// </summary>
+ public const int MAX_TEMPFILES = 128;
+
+ /// <summary>
+ /// A bit more descriptive unit for constructors.
+ /// </summary>
+ /// <seealso cref= #automatic() </seealso>
+ /// <seealso cref= #megabytes(long) </seealso>
+ public sealed class BufferSize
+ {
+ internal readonly int Bytes;
+
+ internal BufferSize(long bytes)
+ {
+ if (bytes > int.MaxValue)
+ {
+ throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
+ }
+
+ if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+ {
+ throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
+ }
+
+ this.Bytes = (int)bytes;
+ }
+
+ /// <summary>
+ /// Creates a <seealso cref="BufferSize"/> in MB. The given
+ /// values must be > 0 and < 2048.
+ /// </summary>
+ public static BufferSize Megabytes(long mb)
+ {
+ return new BufferSize(mb * MB);
+ }
+
+ /// <summary>
+ /// Approximately half of the currently available free heap, but no less
+ /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
+ /// is insufficient or if there is a large portion of unallocated heap-space available
+ /// for sorting consult with max allowed heap size.
+ /// </summary>
+ public static BufferSize Automatic()
+ {
+ var proc = Process.GetCurrentProcess();
+
+ // take sizes in "conservative" order
+ long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
+ long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
+ long free = rt.freeMemory(); // unused portion of currently allocated
+ long totalAvailableBytes = max - total + free;
+
+ // by free mem (attempting to not grow the heap for this)
+ long sortBufferByteSize = free / 2;
+ const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
+ if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
+ {
+ if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
+ {
+ sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
+ }
+ else
+ {
+ //heap seems smallish lets be conservative fall back to the free/2
+ sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
+ }
+ }
+ return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
+ }
+ }
+
+ /// <summary>
+ /// Sort info (debugging mostly).
+ /// </summary>
+ public class SortInfo
+ {
+ internal bool InstanceFieldsInitialized = false;
+
+ internal virtual void InitializeInstanceFields()
+ {
+ BufferSize = OuterInstance.RamBufferSize.Bytes;
+ }
+
+ private readonly OfflineSorter OuterInstance;
+
+ /// <summary>
+ /// number of temporary files created when merging partitions </summary>
+ public int TempMergeFiles;
+ /// <summary>
+ /// number of partition merges </summary>
+ public int MergeRounds;
+ /// <summary>
+ /// number of lines of data read </summary>
+ public int Lines;
+ /// <summary>
+ /// time spent merging sorted partitions (in milliseconds) </summary>
+ public long MergeTime;
+ /// <summary>
+ /// time spent sorting data (in milliseconds) </summary>
+ public long SortTime;
+ /// <summary>
+ /// total time spent (in milliseconds) </summary>
+ public long TotalTime;
+ /// <summary>
+ /// time spent in i/o read (in milliseconds) </summary>
+ public long ReadTime;
+ /// <summary>
+ /// read buffer size (in bytes) </summary>
+ public long BufferSize;
+
+ /// <summary>
+ /// create a new SortInfo (with empty statistics) for debugging </summary>
+ public SortInfo(OfflineSorter outerInstance)
+ {
+ this.OuterInstance = outerInstance;
+
+ if (!InstanceFieldsInitialized)
+ {
+ InitializeInstanceFields();
+ InstanceFieldsInitialized = true;
+ }
+ }
+
+ public override string ToString()
+ {
+ return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
+ }
+ }
+
+ private readonly BufferSize RamBufferSize;
+
+ private readonly Counter BufferBytesUsed = Counter.NewCounter();
+ private BytesRefArray Buffer;
+ private SortInfo sortInfo;
+ private readonly int MaxTempFiles;
+ private readonly IComparer<BytesRef> comparator;
+
+ /// <summary>
+ /// Default comparator: sorts in binary (codepoint) order </summary>
+ public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
+
+ /// <summary>
+ /// Defaults constructor.
+ /// </summary>
+ /// <seealso cref= #defaultTempDir() </seealso>
+ /// <seealso cref= BufferSize#automatic() </seealso>
+ public OfflineSorter()
+ : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+ {
+ if (!InstanceFieldsInitialized)
+ {
+ InitializeInstanceFields();
+ InstanceFieldsInitialized = true;
+ }
+ }
+
+ /// <summary>
+ /// Defaults constructor with a custom comparator.
+ /// </summary>
+ /// <seealso cref= #defaultTempDir() </seealso>
+ /// <seealso cref= BufferSize#automatic() </seealso>
+ public OfflineSorter(IComparer<BytesRef> comparator)
+ : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+ {
+ if (!InstanceFieldsInitialized)
+ {
+ InitializeInstanceFields();
+ InstanceFieldsInitialized = true;
+ }
+ }
+
+ /// <summary>
+ /// All-details constructor.
+ /// </summary>
+ public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
+ {
+ if (!InstanceFieldsInitialized)
+ {
+ InitializeInstanceFields();
+ InstanceFieldsInitialized = true;
+ }
+ if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+ {
+ throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
+ }
+
+ if (maxTempfiles < 2)
+ {
+ throw new System.ArgumentException("maxTempFiles must be >= 2");
+ }
+
+ this.RamBufferSize = ramBufferSize;
+ this.MaxTempFiles = maxTempfiles;
+ this.comparator = comparator;
+ }
+
+ /// <summary>
+ /// Sort input to output, explicit hint for the buffer size. The amount of allocated
+ /// memory may deviate from the hint (may be smaller or larger).
+ /// </summary>
+ public SortInfo Sort(FileInfo input, FileInfo output)
+ {
+ sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
+
+ output.Delete();
+
+ var merges = new List<FileInfo>();
+ bool success2 = false;
+ try
+ {
+ var inputStream = new ByteSequencesReader(input);
+ bool success = false;
+ try
+ {
+ int lines = 0;
+ while ((lines = ReadPartition(inputStream)) > 0)
+ {
+ merges.Add(SortPartition(lines));
+ sortInfo.TempMergeFiles++;
+ sortInfo.Lines += lines;
+
+ // Handle intermediate merges.
+ if (merges.Count == MaxTempFiles)
+ {
+ var intermediate = new FileInfo(Path.GetTempFileName());
+ try
+ {
+ MergePartitions(merges, intermediate);
+ }
+ finally
+ {
+ foreach (var file in merges)
+ {
+ file.Delete();
+ }
+ merges.Clear();
+ merges.Add(intermediate);
+ }
+ sortInfo.TempMergeFiles++;
+ }
+ }
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.Close(inputStream);
+ }
+ else
+ {
+ IOUtils.CloseWhileHandlingException(inputStream);
+ }
+ }
+
+ // One partition, try to rename or copy if unsuccessful.
+ if (merges.Count == 1)
+ {
+ FileInfo single = merges[0];
+ Copy(single, output);
+ try
+ {
+ File.Delete(single.FullName);
+ }
+ catch (Exception)
+ {
+ // ignored
+ }
+ }
+ else
+ {
+ // otherwise merge the partitions with a priority queue.
+ MergePartitions(merges, output);
+ }
+ success2 = true;
+ }
+ finally
+ {
+ foreach (FileInfo file in merges)
+ {
+ file.Delete();
+ }
+ if (!success2)
+ {
+ output.Delete();
+ }
+ }
+
+ sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
+ return sortInfo;
+ }
+
+ /// <summary>
+ /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
+ /// or not available, an IOException is thrown
+ /// </summary>
+ public static DirectoryInfo DefaultTempDir()
+ {
+ return new DirectoryInfo(Path.GetTempPath());
+ }
+
+ /// <summary>
+ /// Copies one file to another.
+ /// </summary>
+ private static void Copy(FileInfo file, FileInfo output)
+ {
+ File.Copy(file.FullName, output.FullName);
+ }
+
+ /// <summary>
+ /// Sort a single partition in-memory. </summary>
+ internal FileInfo SortPartition(int len)
+ {
+ var data = this.Buffer;
+ var tempFile = new FileInfo(Path.GetTempFileName());
+ //var tempFile1 = File.Create(new ());
+ //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
+
+ long start = DateTime.Now.Millisecond;
+ sortInfo.SortTime += (DateTime.Now.Millisecond - start);
+
+ var @out = new ByteSequencesWriter(tempFile);
+ BytesRef spare;
+ try
+ {
+ BytesRefIterator iter = Buffer.Iterator(comparator);
+ while ((spare = iter.Next()) != null)
+ {
+ Debug.Assert(spare.Length <= short.MaxValue);
+ @out.Write(spare);
+ }
+
+ @out.Dispose();
+
+ // Clean up the buffer for the next partition.
+ data.Clear();
+ return tempFile;
+ }
+ finally
+ {
+ IOUtils.Close(@out);
+ }
+ }
+
+ /// <summary>
+ /// Merge a list of sorted temporary files (partitions) into an output file </summary>
+ internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
+ {
+ long start = DateTime.Now.Millisecond;
+
+ var @out = new ByteSequencesWriter(outputFile);
+
+ PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
+
+ var streams = new ByteSequencesReader[merges.Count];
+ try
+ {
+ // Open streams and read the top for each file
+ for (int i = 0; i < merges.Count; i++)
+ {
+ streams[i] = new ByteSequencesReader(merges[i]);
+ sbyte[] line = streams[i].Read();
+ if (line != null)
+ {
+ queue.InsertWithOverflow(new FileAndTop(i, line));
+ }
+ }
+
+ // Unix utility sort() uses ordered array of files to pick the next line from, updating
+ // it as it reads new lines. The PQ used here is a more elegant solution and has
+ // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
+ // so it shouldn't make much of a difference (didn't check).
+ FileAndTop top;
+ while ((top = queue.Top()) != null)
+ {
+ @out.Write(top.Current);
+ if (!streams[top.Fd].Read(top.Current))
+ {
+ queue.Pop();
+ }
+ else
+ {
+ queue.UpdateTop();
+ }
+ }
+
+ SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
+ SortInfo.MergeRounds++;
+ }
+ finally
+ {
+ // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
+ // happening in closing streams.
+ try
+ {
+ IOUtils.Close(streams);
+ }
+ finally
+ {
+ IOUtils.Close(@out);
+ }
+ }
+ }
+
+ private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
+ {
+ private readonly OfflineSorter OuterInstance;
+
+ public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
+ : base(size)
+ {
+ this.OuterInstance = outerInstance;
+ }
+
+ public override bool LessThan(FileAndTop a, FileAndTop b)
+ {
+ return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
+ }
+ }
+
+ /// <summary>
+ /// Read in a single partition of data </summary>
+ internal int ReadPartition(ByteSequencesReader reader)
+ {
+ long start = DateTime.Now.Millisecond;
+ var scratch = new BytesRef();
+ while ((scratch.Bytes = reader.Read()) != null)
+ {
+ scratch.Length = scratch.Bytes.Length;
+ Buffer.Append(scratch);
+ // Account for the created objects.
+ // (buffer slots do not account to buffer size.)
+ if (RamBufferSize.Bytes < BufferBytesUsed.Get())
+ {
+ break;
+ }
+ }
+ sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
+ return Buffer.Size();
+ }
+
+ internal class FileAndTop
+ {
+ internal readonly int Fd;
+ internal readonly BytesRef Current;
+
+ internal FileAndTop(int fd, sbyte[] firstLine)
+ {
+ this.Fd = fd;
+ this.Current = new BytesRef(firstLine);
+ }
+ }
+
+ /// <summary>
+ /// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
+ /// Complementary to <seealso cref="ByteSequencesReader"/>.
+ /// </summary>
+ public class ByteSequencesWriter : IDisposable
+ {
+ internal readonly DataOutput Os;
+
+ /// <summary>
+ /// Constructs a ByteSequencesWriter to the provided File </summary>
+ public ByteSequencesWriter(FileInfo file)
+ : this(new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file))))
+ {
+ }
+
+ /// <summary>
+ /// Constructs a ByteSequencesWriter to the provided DataOutput </summary>
+ public ByteSequencesWriter(DataOutput os)
+ {
+ this.Os = os;
+ }
+
+ /// <summary>
+ /// Writes a BytesRef. </summary>
+ /// <seealso cref= #write(byte[], int, int) </seealso>
+ public virtual void Write(BytesRef @ref)
+ {
+ Debug.Assert(@ref != null);
+ Write(@ref.Bytes, @ref.Offset, @ref.Length);
+ }
+
+ /// <summary>
+ /// Writes a byte array. </summary>
+ /// <seealso cref= #write(byte[], int, int) </seealso>
+ public virtual void Write(sbyte[] bytes)
+ {
+ Write(bytes, 0, bytes.Length);
+ }
+
+ /// <summary>
+ /// Writes a byte array.
+ /// <p>
+ /// The length is written as a <code>short</code>, followed
+ /// by the bytes.
+ /// </summary>
+ public virtual void Write(sbyte[] bytes, int off, int len)
+ {
+ Debug.Assert(bytes != null);
+ Debug.Assert(off >= 0 && off + len <= bytes.Length);
+ Debug.Assert(len >= 0);
+ Os.WriteShort(len);
+ Os.Write(bytes, off, len);
+ }
+
+ /// <summary>
+ /// Closes the provided <seealso cref="DataOutput"/> if it is <seealso cref="IDisposable"/>.
+ /// </summary>
+ public void Dispose()
+ {
+ var os = Os as IDisposable;
+ if (os != null)
+ {
+ os.Dispose();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Utility class to read length-prefixed byte[] entries from an input.
+ /// Complementary to <seealso cref="ByteSequencesWriter"/>.
+ /// </summary>
+ public class ByteSequencesReader : IDisposable
+ {
+ internal readonly DataInput inputStream;
+
+ /// <summary>
+ /// Constructs a ByteSequencesReader from the provided File </summary>
+ public ByteSequencesReader(FileInfo file)
+ : this(new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
+ {
+ }
+
+ /// <summary>
+ /// Constructs a ByteSequencesReader from the provided DataInput </summary>
+ public ByteSequencesReader(DataInput inputStream)
+ {
+ this.inputStream = inputStream;
+ }
+
+ /// <summary>
+ /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
+ /// storage is resized if needed.
+ /// </summary>
+ /// <returns> Returns <code>false</code> if EOF occurred when trying to read
+ /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
+ /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+ public virtual bool Read(BytesRef @ref)
+ {
+ short length;
+ try
+ {
+ length = inputStream.ReadShort();
+ }
+ catch (EOFException)
+ {
+ return false;
+ }
+
+ @ref.Grow(length);
+ @ref.Offset = 0;
+ @ref.Length = length;
+ inputStream.ReadFully(@ref.Bytes, 0, length);
+ return true;
+ }
+
+ /// <summary>
+ /// Reads the next entry and returns it if successful.
+ /// </summary>
+ /// <seealso cref= #read(BytesRef)
+ /// </seealso>
+ /// <returns> Returns <code>null</code> if EOF occurred before the next entry
+ /// could be read. </returns>
+ /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+ public virtual sbyte[] Read()
+ {
+ short length;
+ try
+ {
+ length = inputStream.ReadShort();
+ }
+ catch (EOFException e)
+ {
+ return null;
+ }
+
+ Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
+ sbyte[] result = new sbyte[length];
+ inputStream.ReadFully(result);
+ return result;
+ }
+
+ /// <summary>
+ /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
+ /// </summary>
+ public void Dispose()
+ {
+ var @is = inputStream as IDisposable;
+ if (@is != null)
+ {
+ @is.Dispose();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Returns the comparator in use to sort entries </summary>
+ public IComparer<BytesRef> Comparator
+ {
+ get
+ {
+ return comparator;
+ }
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj b/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
new file mode 100644
index 0000000..9b86c9c
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{FBD2EB4D-EAC9-409C-A23D-64D27DF23576}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Search</RootNamespace>
+ <AssemblyName>Lucene.Net.Suggest</AssemblyName>
+ <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ <TargetFrameworkProfile />
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Spell\CombineSuggestion.cs" />
+ <Compile Include="Spell\Dictionary.cs" />
+ <Compile Include="Spell\DirectSpellChecker.cs" />
+ <Compile Include="Spell\HighFrequencyDictionary.cs" />
+ <Compile Include="Spell\JaroWinklerDistance.cs" />
+ <Compile Include="Spell\LevensteinDistance.cs" />
+ <Compile Include="Spell\LuceneDictionary.cs" />
+ <Compile Include="Spell\LuceneLevenshteinDistance.cs" />
+ <Compile Include="Spell\NGramDistance.cs" />
+ <Compile Include="Spell\PlainTextDictionary.cs" />
+ <Compile Include="Spell\SpellChecker.cs" />
+ <Compile Include="Spell\StringDistance.cs" />
+ <Compile Include="Spell\SuggestMode.cs" />
+ <Compile Include="Spell\SuggestWord.cs" />
+ <Compile Include="Spell\SuggestWordFrequencyComparator.cs" />
+ <Compile Include="Spell\SuggestWordQueue.cs" />
+ <Compile Include="Spell\SuggestWordScoreComparator.cs" />
+ <Compile Include="Spell\TermFreqIterator.cs" />
+ <Compile Include="Spell\WordBreakSpellChecker.cs" />
+ <Compile Include="Suggest\Analyzing\AnalyzingInfixSuggester.cs" />
+ <Compile Include="Suggest\Analyzing\AnalyzingSuggester.cs" />
+ <Compile Include="Suggest\Analyzing\BlendedInfixSuggester.cs" />
+ <Compile Include="Suggest\Analyzing\FreeTextSuggester.cs" />
+ <Compile Include="Suggest\Analyzing\FSTUtil.cs" />
+ <Compile Include="Suggest\Analyzing\FuzzySuggester.cs" />
+ <Compile Include="Suggest\Analyzing\SuggestStopFilter.cs" />
+ <Compile Include="Suggest\BufferedInputIterator.cs" />
+ <Compile Include="Suggest\BufferingTermFreqIteratorWrapper.cs" />
+ <Compile Include="Suggest\DocumentDictionary.cs" />
+ <Compile Include="Suggest\DocumentValueSourceDictionary.cs" />
+ <Compile Include="Suggest\FileDictionary.cs" />
+ <Compile Include="Suggest\Fst\BytesRefSorter.cs" />
+ <Compile Include="Suggest\Fst\ExternalRefSorter.cs" />
+ <Compile Include="Suggest\Fst\FSTCompletion.cs" />
+ <Compile Include="Suggest\Fst\FSTCompletionBuilder.cs" />
+ <Compile Include="Suggest\Fst\FSTCompletionLookup.cs" />
+ <Compile Include="Suggest\Fst\WFSTCompletionLookup.cs" />
+ <Compile Include="Suggest\InMemorySorter.cs" />
+ <Compile Include="Suggest\InputIterator.cs" />
+ <Compile Include="Suggest\Jaspell\JaspellLookup.cs" />
+ <Compile Include="Suggest\Jaspell\JaspellTernarySearchTrie.cs" />
+ <Compile Include="Suggest\Lookup.cs" />
+ <Compile Include="Suggest\SortedInputIterator.cs" />
+ <Compile Include="Suggest\SortedTermFreqIteratorWrapper.cs" />
+ <Compile Include="Suggest\Tst\TernaryTreeNode.cs" />
+ <Compile Include="Suggest\Tst\TSTAutocomplete.cs" />
+ <Compile Include="Suggest\Tst\TSTLookup.cs" />
+ <Compile Include="Suggest\UnsortedInputIterator.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup />
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs b/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..55818e7
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Suggest")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Suggest")]
+[assembly: AssemblyCopyright("Copyright © 2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("0e1499f7-850e-4583-8994-623eb2480200")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/RectangularArrays.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/RectangularArrays.cs b/src/Lucene.Net.Suggest/RectangularArrays.cs
new file mode 100644
index 0000000..f0cb588
--- /dev/null
+++ b/src/Lucene.Net.Suggest/RectangularArrays.cs
@@ -0,0 +1,29 @@
+//----------------------------------------------------------------------------------------
+// Copyright © 2007 - 2014 Tangible Software Solutions Inc.
+// This class can be used by anyone provided that the copyright notice remains intact.
+//
+// This class provides the logic to simulate Java rectangular arrays, which are jagged
+// arrays with inner arrays of the same length. A size of -1 indicates unknown length.
+//----------------------------------------------------------------------------------------
+internal static partial class RectangularArrays
+{
+ internal static int[][] ReturnRectangularIntArray(int Size1, int Size2)
+ {
+ int[][] Array;
+ if (Size1 > -1)
+ {
+ Array = new int[Size1][];
+ if (Size2 > -1)
+ {
+ for (int Array1 = 0; Array1 < Size1; Array1++)
+ {
+ Array[Array1] = new int[Size2];
+ }
+ }
+ }
+ else
+ Array = null;
+
+ return Array;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs b/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
new file mode 100644
index 0000000..de534d5
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
@@ -0,0 +1,47 @@
+namespace Lucene.Net.Search.Spell
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// <para>A suggestion generated by combining one or more original query terms</para>
+ /// </summary>
+ public class CombineSuggestion
+ {
+ /// <summary>
+ /// <para>The indexes from the passed-in array of terms used to make this word combination</para>
+ /// </summary>
+ public readonly int[] originalTermIndexes;
+
+ /// <summary>
+ /// <para>The word combination suggestion</para>
+ /// </summary>
+ public readonly SuggestWord suggestion;
+
+ /// <summary>
+ /// Creates a new CombineSuggestion from a <code>suggestion</code> and
+ /// an array of term ids (referencing the indexes to the original terms that
+ /// form this combined suggestion)
+ /// </summary>
+ public CombineSuggestion(SuggestWord suggestion, int[] originalTermIndexes)
+ {
+ this.suggestion = suggestion;
+ this.originalTermIndexes = originalTermIndexes;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/Dictionary.cs b/src/Lucene.Net.Suggest/Spell/Dictionary.cs
new file mode 100644
index 0000000..e5d91ce
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/Dictionary.cs
@@ -0,0 +1,35 @@
+using Lucene.Net.Search.Suggest;
+
+namespace Lucene.Net.Search.Spell
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// A simple interface representing a Dictionary. A Dictionary
+ /// here is a list of entries, where every entry consists of
+ /// term, weight and payload.
+ ///
+ /// </summary>
+ public interface Dictionary
+ {
+
+ /// <summary>
+ /// Returns an iterator over all the entries </summary>
+ /// <returns> Iterator </returns>
+ InputIterator EntryIterator { get; }
+ }
+}
\ No newline at end of file