You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/05/16 20:19:24 UTC
[3/3] lucenenet git commit: put back commented out code
put back commented out code
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/e3833d6c
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/e3833d6c
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/e3833d6c
Branch: refs/heads/master
Commit: e3833d6ccac28ed7879336e826cdeb5e3f9bdc77
Parents: 6133cf8
Author: Laimonas Simutis <la...@gmail.com>
Authored: Sat May 16 14:17:00 2015 -0400
Committer: Laimonas Simutis <la...@gmail.com>
Committed: Sat May 16 14:17:00 2015 -0400
----------------------------------------------------------------------
src/Lucene.Net.Core/Util/OfflineSorter.cs | 482 +++++++++++++++++++++++++
1 file changed, 482 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/e3833d6c/src/Lucene.Net.Core/Util/OfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OfflineSorter.cs b/src/Lucene.Net.Core/Util/OfflineSorter.cs
index afc0159..db0a86b 100644
--- a/src/Lucene.Net.Core/Util/OfflineSorter.cs
+++ b/src/Lucene.Net.Core/Util/OfflineSorter.cs
@@ -34,6 +34,488 @@ namespace Lucene.Net.Util
/// </summary>
public sealed class OfflineSorter
{
+ // LUCENENET TODO: keep this code as it will be used by subprojects once ported
+ // private bool InstanceFieldsInitialized = false;
+ //
+ // private void InitializeInstanceFields()
+ // {
+ // Buffer = new BytesRefArray(BufferBytesUsed);
+ // }
+ //
+ // /// <summary>
+ // /// Convenience constant for megabytes </summary>
+ // public const long MB = 1024 * 1024;
+ // /// <summary>
+ // /// Convenience constant for gigabytes </summary>
+ // public static readonly long GB = MB * 1024;
+ //
+ // /// <summary>
+ // /// Minimum recommended buffer size for sorting.
+ // /// </summary>
+ // public const long MIN_BUFFER_SIZE_MB = 32;
+ //
+ // /// <summary>
+ // /// Absolute minimum required buffer size for sorting.
+ // /// </summary>
+ // public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
+ // private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
+ //
+ // /// <summary>
+ // /// Maximum number of temporary files before doing an intermediate merge.
+ // /// </summary>
+ // public const int MAX_TEMPFILES = 128;
+ //
+ // /// <summary>
+ // /// A bit more descriptive unit for constructors.
+ // /// </summary>
+ // /// <seealso cref= #automatic() </seealso>
+ // /// <seealso cref= #megabytes(long) </seealso>
+ // public sealed class BufferSize
+ // {
+ // internal readonly int Bytes;
+ //
+ // internal BufferSize(long bytes)
+ // {
+ // if (bytes > int.MaxValue)
+ // {
+ // throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
+ // }
+ //
+ // if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+ // {
+ // throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
+ // }
+ //
+ // this.Bytes = (int)bytes;
+ // }
+ //
+ // /// <summary>
+ // /// Creates a <seealso cref="BufferSize"/> in MB. The given
+ // /// values must be > 0 and < 2048.
+ // /// </summary>
+ // public static BufferSize Megabytes(long mb)
+ // {
+ // return new BufferSize(mb * MB);
+ // }
+ //
+ // /// <summary>
+ // /// Approximately half of the currently available free heap, but no less
+ // /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
+ // /// is insufficient or if there is a large portion of unallocated heap-space available
+ // /// for sorting consult with max allowed heap size.
+ // /// </summary>
+ // public static BufferSize Automatic()
+ // {
+ // var proc = Process.GetCurrentProcess();
+ //
+ // // take sizes in "conservative" order
+ // long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
+ // long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
+ // long free = rt.freeMemory(); // unused portion of currently allocated
+ // long totalAvailableBytes = max - total + free;
+ //
+ // // by free mem (attempting to not grow the heap for this)
+ // long sortBufferByteSize = free / 2;
+ // const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
+ // if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
+ // {
+ // if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
+ // {
+ // sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
+ // }
+ // else
+ // {
+ // //heap seems smallish lets be conservative fall back to the free/2
+ // sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
+ // }
+ // }
+ // return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
+ // }
+ // }
+ //
+ // /// <summary>
+ // /// Sort info (debugging mostly).
+ // /// </summary>
+ // public class SortInfo
+ // {
+ // internal bool InstanceFieldsInitialized = false;
+ //
+ // internal virtual void InitializeInstanceFields()
+ // {
+ // BufferSize = OuterInstance.RamBufferSize.Bytes;
+ // }
+ //
+ // private readonly OfflineSorter OuterInstance;
+ //
+ // /// <summary>
+ // /// number of temporary files created when merging partitions </summary>
+ // public int TempMergeFiles;
+ // /// <summary>
+ // /// number of partition merges </summary>
+ // public int MergeRounds;
+ // /// <summary>
+ // /// number of lines of data read </summary>
+ // public int Lines;
+ // /// <summary>
+ // /// time spent merging sorted partitions (in milliseconds) </summary>
+ // public long MergeTime;
+ // /// <summary>
+ // /// time spent sorting data (in milliseconds) </summary>
+ // public long SortTime;
+ // /// <summary>
+ // /// total time spent (in milliseconds) </summary>
+ // public long TotalTime;
+ // /// <summary>
+ // /// time spent in i/o read (in milliseconds) </summary>
+ // public long ReadTime;
+ // /// <summary>
+ // /// read buffer size (in bytes) </summary>
+ // public long BufferSize;
+ //
+ // /// <summary>
+ // /// create a new SortInfo (with empty statistics) for debugging </summary>
+ // public SortInfo(OfflineSorter outerInstance)
+ // {
+ // this.OuterInstance = outerInstance;
+ //
+ // if (!InstanceFieldsInitialized)
+ // {
+ // InitializeInstanceFields();
+ // InstanceFieldsInitialized = true;
+ // }
+ // }
+ //
+ // public override string ToString()
+ // {
+ // return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
+ // }
+ // }
+ //
+ // private readonly BufferSize RamBufferSize;
+ //
+ // private readonly Counter BufferBytesUsed = Counter.NewCounter();
+ // private BytesRefArray Buffer;
+ // private SortInfo sortInfo;
+ // private readonly int MaxTempFiles;
+ // private readonly IComparer<BytesRef> comparator;
+ //
+ // /// <summary>
+ // /// Default comparator: sorts in binary (codepoint) order </summary>
+ // public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
+ //
+ // /// <summary>
+ // /// Defaults constructor.
+ // /// </summary>
+ // /// <seealso cref= #defaultTempDir() </seealso>
+ // /// <seealso cref= BufferSize#automatic() </seealso>
+ // public OfflineSorter()
+ // : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+ // {
+ // if (!InstanceFieldsInitialized)
+ // {
+ // InitializeInstanceFields();
+ // InstanceFieldsInitialized = true;
+ // }
+ // }
+ //
+ // /// <summary>
+ // /// Defaults constructor with a custom comparator.
+ // /// </summary>
+ // /// <seealso cref= #defaultTempDir() </seealso>
+ // /// <seealso cref= BufferSize#automatic() </seealso>
+ // public OfflineSorter(IComparer<BytesRef> comparator)
+ // : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+ // {
+ // if (!InstanceFieldsInitialized)
+ // {
+ // InitializeInstanceFields();
+ // InstanceFieldsInitialized = true;
+ // }
+ // }
+ //
+ // /// <summary>
+ // /// All-details constructor.
+ // /// </summary>
+ // public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
+ // {
+ // if (!InstanceFieldsInitialized)
+ // {
+ // InitializeInstanceFields();
+ // InstanceFieldsInitialized = true;
+ // }
+ // if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+ // {
+ // throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
+ // }
+ //
+ // if (maxTempfiles < 2)
+ // {
+ // throw new System.ArgumentException("maxTempFiles must be >= 2");
+ // }
+ //
+ // this.RamBufferSize = ramBufferSize;
+ // this.MaxTempFiles = maxTempfiles;
+ // this.comparator = comparator;
+ // }
+ //
+ // /// <summary>
+ // /// Sort input to output, explicit hint for the buffer size. The amount of allocated
+ // /// memory may deviate from the hint (may be smaller or larger).
+ // /// </summary>
+ // public SortInfo Sort(FileInfo input, FileInfo output)
+ // {
+ // sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
+ //
+ // output.Delete();
+ //
+ // var merges = new List<FileInfo>();
+ // bool success2 = false;
+ // try
+ // {
+ // var inputStream = new ByteSequencesReader(input);
+ // bool success = false;
+ // try
+ // {
+ // int lines = 0;
+ // while ((lines = ReadPartition(inputStream)) > 0)
+ // {
+ // merges.Add(SortPartition(lines));
+ // sortInfo.TempMergeFiles++;
+ // sortInfo.Lines += lines;
+ //
+ // // Handle intermediate merges.
+ // if (merges.Count == MaxTempFiles)
+ // {
+ // var intermediate = new FileInfo(Path.GetTempFileName());
+ // try
+ // {
+ // MergePartitions(merges, intermediate);
+ // }
+ // finally
+ // {
+ // foreach (var file in merges)
+ // {
+ // file.Delete();
+ // }
+ // merges.Clear();
+ // merges.Add(intermediate);
+ // }
+ // sortInfo.TempMergeFiles++;
+ // }
+ // }
+ // success = true;
+ // }
+ // finally
+ // {
+ // if (success)
+ // {
+ // IOUtils.Close(inputStream);
+ // }
+ // else
+ // {
+ // IOUtils.CloseWhileHandlingException(inputStream);
+ // }
+ // }
+ //
+ // // One partition, try to rename or copy if unsuccessful.
+ // if (merges.Count == 1)
+ // {
+ // FileInfo single = merges[0];
+ // Copy(single, output);
+ // try
+ // {
+ // File.Delete(single.FullName);
+ // }
+ // catch (Exception)
+ // {
+ // // ignored
+ // }
+ // }
+ // else
+ // {
+ // // otherwise merge the partitions with a priority queue.
+ // MergePartitions(merges, output);
+ // }
+ // success2 = true;
+ // }
+ // finally
+ // {
+ // foreach (FileInfo file in merges)
+ // {
+ // file.Delete();
+ // }
+ // if (!success2)
+ // {
+ // output.Delete();
+ // }
+ // }
+ //
+ // sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
+ // return sortInfo;
+ // }
+ //
+ // /// <summary>
+ // /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
+ // /// or not available, an IOException is thrown
+ // /// </summary>
+ // public static DirectoryInfo DefaultTempDir()
+ // {
+ // return new DirectoryInfo(Path.GetTempPath());
+ // }
+ //
+ // /// <summary>
+ // /// Copies one file to another.
+ // /// </summary>
+ // private static void Copy(FileInfo file, FileInfo output)
+ // {
+ // File.Copy(file.FullName, output.FullName);
+ // }
+ //
+ // /// <summary>
+ // /// Sort a single partition in-memory. </summary>
+ // internal FileInfo SortPartition(int len)
+ // {
+ // var data = this.Buffer;
+ // var tempFile = new FileInfo(Path.GetTempFileName());
+ // //var tempFile1 = File.Create(new ());
+ // //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
+ //
+ // long start = DateTime.Now.Millisecond;
+ // sortInfo.SortTime += (DateTime.Now.Millisecond - start);
+ //
+ // var @out = new ByteSequencesWriter(tempFile);
+ // BytesRef spare;
+ // try
+ // {
+ // BytesRefIterator iter = Buffer.Iterator(comparator);
+ // while ((spare = iter.Next()) != null)
+ // {
+ // Debug.Assert(spare.Length <= short.MaxValue);
+ // @out.Write(spare);
+ // }
+ //
+ // @out.Dispose();
+ //
+ // // Clean up the buffer for the next partition.
+ // data.Clear();
+ // return tempFile;
+ // }
+ // finally
+ // {
+ // IOUtils.Close(@out);
+ // }
+ // }
+ //
+ // /// <summary>
+ // /// Merge a list of sorted temporary files (partitions) into an output file </summary>
+ // internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
+ // {
+ // long start = DateTime.Now.Millisecond;
+ //
+ // var @out = new ByteSequencesWriter(outputFile);
+ //
+ // PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
+ //
+ // var streams = new ByteSequencesReader[merges.Count];
+ // try
+ // {
+ // // Open streams and read the top for each file
+ // for (int i = 0; i < merges.Count; i++)
+ // {
+ // streams[i] = new ByteSequencesReader(merges[i]);
+ // sbyte[] line = streams[i].Read();
+ // if (line != null)
+ // {
+ // queue.InsertWithOverflow(new FileAndTop(i, line));
+ // }
+ // }
+ //
+ // // Unix utility sort() uses ordered array of files to pick the next line from, updating
+ // // it as it reads new lines. The PQ used here is a more elegant solution and has
+ // // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
+ // // so it shouldn't make much of a difference (didn't check).
+ // FileAndTop top;
+ // while ((top = queue.Top()) != null)
+ // {
+ // @out.Write(top.Current);
+ // if (!streams[top.Fd].Read(top.Current))
+ // {
+ // queue.Pop();
+ // }
+ // else
+ // {
+ // queue.UpdateTop();
+ // }
+ // }
+ //
+ // SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
+ // SortInfo.MergeRounds++;
+ // }
+ // finally
+ // {
+ // // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
+ // // happening in closing streams.
+ // try
+ // {
+ // IOUtils.Close(streams);
+ // }
+ // finally
+ // {
+ // IOUtils.Close(@out);
+ // }
+ // }
+ // }
+ //
+ // private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
+ // {
+ // private readonly OfflineSorter OuterInstance;
+ //
+ // public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
+ // : base(size)
+ // {
+ // this.OuterInstance = outerInstance;
+ // }
+ //
+ // public override bool LessThan(FileAndTop a, FileAndTop b)
+ // {
+ // return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
+ // }
+ // }
+ //
+ // /// <summary>
+ // /// Read in a single partition of data </summary>
+ // internal int ReadPartition(ByteSequencesReader reader)
+ // {
+ // long start = DateTime.Now.Millisecond;
+ // var scratch = new BytesRef();
+ // while ((scratch.Bytes = reader.Read()) != null)
+ // {
+ // scratch.Length = scratch.Bytes.Length;
+ // Buffer.Append(scratch);
+ // // Account for the created objects.
+ // // (buffer slots do not account to buffer size.)
+ // if (RamBufferSize.Bytes < BufferBytesUsed.Get())
+ // {
+ // break;
+ // }
+ // }
+ // sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
+ // return Buffer.Size();
+ // }
+ //
+ // internal class FileAndTop
+ // {
+ // internal readonly int Fd;
+ // internal readonly BytesRef Current;
+ //
+ // internal FileAndTop(int fd, sbyte[] firstLine)
+ // {
+ // this.Fd = fd;
+ // this.Current = new BytesRef(firstLine);
+ // }
+ // }
+ //
+
/// <summary>
/// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
/// Complementary to <seealso cref="ByteSequencesReader"/>.