You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2022/11/26 12:17:10 UTC
[lucenenet] 04/05: Lucene.Net.Util.OfflineSorter: Added back original tests using FileInfo and fixed bugs that were preventing the original behavior
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 0debb1c4d8a9f00d5f6ea205ac44b878f0f3a346
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Wed Nov 23 20:49:06 2022 +0700
Lucene.Net.Util.OfflineSorter: Added back original tests using FileInfo and fixed bugs that were preventing the original behavior
---
src/Lucene.Net.Tests/Util/TestOfflineSorter.cs | 111 +++++++++++++++++++++++--
src/Lucene.Net/Util/OfflineSorter.cs | 28 ++++++-
2 files changed, 131 insertions(+), 8 deletions(-)
diff --git a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
index 55fb91152..50fc13f83 100644
--- a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
+++ b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
@@ -5,6 +5,7 @@ using System.Collections.Generic;
using System.IO;
using JCG = J2N.Collections.Generic;
using Assert = Lucene.Net.TestFramework.Assert;
+using Lucene.Net.Attributes;
namespace Lucene.Net.Util
{
@@ -72,6 +73,15 @@ namespace Lucene.Net.Util
#pragma warning restore CA1825 // Avoid zero-length array allocations.
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestEmpty_AsStream()
+ {
+#pragma warning disable CA1825 // Avoid zero-length array allocations.
+ CheckSortAsStream(new OfflineSorter(), new byte[][] { });
+#pragma warning restore CA1825 // Avoid zero-length array allocations.
+ }
+
[Test]
public virtual void TestSingleLine()
{
@@ -80,6 +90,15 @@ namespace Lucene.Net.Util
#pragma warning restore 612, 618
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestSingleLine_AsStream()
+ {
+#pragma warning disable 612, 618
+ CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+#pragma warning restore 612, 618
+ }
+
[Test]
public virtual void TestIntermediateMerges()
{
@@ -88,6 +107,15 @@ namespace Lucene.Net.Util
Assert.IsTrue(info.MergeRounds > 10);
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestIntermediateMerges_AsStream()
+ {
+ // Sort 20 mb worth of data with 1mb buffer, binary merging.
+ OfflineSorter.SortInfo info = CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir, 2), GenerateRandom((int)OfflineSorter.MB * 20));
+ Assert.IsTrue(info.MergeRounds > 10);
+ }
+
[Test]
public virtual void TestSmallRandom()
{
@@ -96,6 +124,15 @@ namespace Lucene.Net.Util
Assert.AreEqual(1, sortInfo.MergeRounds);
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestSmallRandom_AsStream()
+ {
+ // Sort 20 mb worth of data with 1mb buffer.
+ OfflineSorter.SortInfo sortInfo = CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 20));
+ Assert.AreEqual(1, sortInfo.MergeRounds);
+ }
+
[Test]
[Nightly]
public virtual void TestLargerRandom()
@@ -104,6 +141,15 @@ namespace Lucene.Net.Util
CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(16), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
}
+ [Test]
+ [Nightly]
+ [LuceneNetSpecific]
+ public virtual void TestLargerRandom_AsStream()
+ {
+ // Sort 100MB worth of data with 15mb buffer.
+ CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(16), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
+ }
+
private byte[][] GenerateRandom(int howMuchData)
{
JCG.List<byte[]> data = new JCG.List<byte[]>();
@@ -136,10 +182,29 @@ namespace Lucene.Net.Util
/// </summary>
private OfflineSorter.SortInfo CheckSort(OfflineSorter sort, byte[][] data)
{
- using FileStream unsorted = WriteAll("unsorted", data);
+ FileInfo unsorted = WriteAll("unsorted", data);
+
+ Array.Sort(data, unsignedByteOrderComparer);
+ FileInfo golden = WriteAll("golden", data);
+
+ FileInfo sorted = new FileInfo(Path.Combine(tempDir.FullName, "sorted"));
+ OfflineSorter.SortInfo sortInfo = sort.Sort(unsorted, sorted);
+ //System.out.println("Input size [MB]: " + unsorted.Length() / (1024 * 1024));
+ //System.out.println(sortInfo);
+
+ AssertFilesIdentical(golden, sorted);
+ return sortInfo;
+ }
+
+ /// <summary>
+ /// Check sorting data on an instance of <seealso cref="OfflineSorter"/>.
+ /// </summary>
+ private OfflineSorter.SortInfo CheckSortAsStream(OfflineSorter sort, byte[][] data)
+ {
+ using FileStream unsorted = WriteAllAsStream("unsorted", data);
Array.Sort(data, unsignedByteOrderComparer);
- using FileStream golden = WriteAll("golden", data);
+ using FileStream golden = WriteAllAsStream("golden", data);
string sortedFile = Path.Combine(tempDir.FullName, "sorted");
using FileStream sorted = new FileStream(sortedFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.Read, bufferSize: OfflineSorter.DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose);
@@ -151,10 +216,33 @@ namespace Lucene.Net.Util
return sortInfo;
}
+
/// <summary>
/// Make sure two files are byte-byte identical.
/// </summary>
- // LUCENENET specific - switched to using FileStream rather than FileInfo
+ private void AssertFilesIdentical(FileInfo golden, FileInfo sorted)
+ {
+ Assert.AreEqual(golden.Length, sorted.Length);
+
+ byte[] buf1 = new byte[64 * 1024];
+ byte[] buf2 = new byte[64 * 1024];
+ int len;
+ using Stream is1 = golden.Open(FileMode.Open, FileAccess.Read, FileShare.Delete);
+ using Stream is2 = sorted.Open(FileMode.Open, FileAccess.Read, FileShare.Delete);
+ while ((len = is1.Read(buf1, 0, buf1.Length)) > 0)
+ {
+ is2.Read(buf2, 0, len);
+ for (int i = 0; i < len; i++)
+ {
+ Assert.AreEqual(buf1[i], buf2[i]);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Make sure two files are byte-byte identical.
+ /// </summary>
+ // LUCENENET specific - test using FileStream rather than FileInfo
private void AssertFilesIdentical(FileStream golden, FileStream sorted)
{
Assert.AreEqual(golden.Length, sorted.Length);
@@ -174,8 +262,21 @@ namespace Lucene.Net.Util
}
}
- // LUCENENET specific - switched to using FileStream rather than FileInfo
- private FileStream WriteAll(string name, byte[][] data)
+ private FileInfo WriteAll(string name, byte[][] data)
+ {
+ FileInfo file = new FileInfo(Path.Combine(tempDir.FullName, name));
+ using (file.Create()) { }
+ OfflineSorter.ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
+ foreach (byte[] datum in data)
+ {
+ w.Write(datum);
+ }
+ w.Dispose();
+ return file;
+ }
+
+ // LUCENENET specific - test using FileStream rather than FileInfo
+ private FileStream WriteAllAsStream(string name, byte[][] data)
{
FileInfo file = new FileInfo(Path.Combine(tempDir.FullName, name));
var stream = new FileStream(file.FullName, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.Read, bufferSize: OfflineSorter.DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose);
diff --git a/src/Lucene.Net/Util/OfflineSorter.cs b/src/Lucene.Net/Util/OfflineSorter.cs
index d484aa1dc..5e0d0ac18 100644
--- a/src/Lucene.Net/Util/OfflineSorter.cs
+++ b/src/Lucene.Net/Util/OfflineSorter.cs
@@ -405,11 +405,33 @@ namespace Lucene.Net.Util
if (output is null)
throw new ArgumentNullException(nameof(output));
+ output.Delete();
+
using FileStream inputStream = new FileStream(input.FullName, FileMode.Open, FileAccess.ReadWrite,
FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose | FileOptions.RandomAccess);
- using FileStream outputStream = new FileStream(output.FullName, FileMode.Open, FileAccess.ReadWrite,
- FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose | FileOptions.RandomAccess);
- return Sort(inputStream, outputStream);
+ using FileStream outputStream = new FileStream(output.FullName, FileMode.CreateNew, FileAccess.ReadWrite,
+ FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.RandomAccess);
+ bool success = false;
+ try
+ {
+ var sort = Sort(inputStream, outputStream);
+ success = true;
+ return sort;
+ }
+ finally
+ {
+ if (!success)
+ {
+ try
+ {
+ outputStream.Dispose();
+ }
+ finally
+ {
+ output.Delete();
+ }
+ }
+ }
}
/// <summary>