You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2022/11/26 12:17:10 UTC

[lucenenet] 04/05: Lucene.Net.Util.OfflineSorter: Added back original tests using FileInfo and fixed bugs that were preventing the original behavior

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit 0debb1c4d8a9f00d5f6ea205ac44b878f0f3a346
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Wed Nov 23 20:49:06 2022 +0700

    Lucene.Net.Util.OfflineSorter: Added back original tests using FileInfo and fixed bugs that were preventing the original behavior
---
 src/Lucene.Net.Tests/Util/TestOfflineSorter.cs | 111 +++++++++++++++++++++++--
 src/Lucene.Net/Util/OfflineSorter.cs           |  28 ++++++-
 2 files changed, 131 insertions(+), 8 deletions(-)

diff --git a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
index 55fb91152..50fc13f83 100644
--- a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
+++ b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
@@ -5,6 +5,7 @@ using System.Collections.Generic;
 using System.IO;
 using JCG = J2N.Collections.Generic;
 using Assert = Lucene.Net.TestFramework.Assert;
+using Lucene.Net.Attributes;
 
 namespace Lucene.Net.Util
 {
@@ -72,6 +73,15 @@ namespace Lucene.Net.Util
 #pragma warning restore CA1825 // Avoid zero-length array allocations.
         }
 
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestEmpty_AsStream()
+        {
+#pragma warning disable CA1825 // Avoid zero-length array allocations.
+            CheckSortAsStream(new OfflineSorter(), new byte[][] { });
+#pragma warning restore CA1825 // Avoid zero-length array allocations.
+        }
+
         [Test]
         public virtual void TestSingleLine()
         {
@@ -80,6 +90,15 @@ namespace Lucene.Net.Util
 #pragma warning restore 612, 618
         }
 
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestSingleLine_AsStream()
+        {
+#pragma warning disable 612, 618
+            CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+#pragma warning restore 612, 618
+        }
+
         [Test]
         public virtual void TestIntermediateMerges()
         {
@@ -88,6 +107,15 @@ namespace Lucene.Net.Util
             Assert.IsTrue(info.MergeRounds > 10);
         }
 
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestIntermediateMerges_AsStream()
+        {
+            // Sort 20 mb worth of data with 1mb buffer, binary merging.
+            OfflineSorter.SortInfo info = CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir, 2), GenerateRandom((int)OfflineSorter.MB * 20));
+            Assert.IsTrue(info.MergeRounds > 10);
+        }
+
         [Test]
         public virtual void TestSmallRandom()
         {
@@ -96,6 +124,15 @@ namespace Lucene.Net.Util
             Assert.AreEqual(1, sortInfo.MergeRounds);
         }
 
+        [Test]
+        [LuceneNetSpecific]
+        public virtual void TestSmallRandom_AsStream()
+        {
+            // Sort 20 mb worth of data with 1mb buffer.
+            OfflineSorter.SortInfo sortInfo = CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(1), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 20));
+            Assert.AreEqual(1, sortInfo.MergeRounds);
+        }
+
         [Test]
         [Nightly]
         public virtual void TestLargerRandom()
@@ -104,6 +141,15 @@ namespace Lucene.Net.Util
             CheckSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(16), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
         }
 
+        [Test]
+        [Nightly]
+        [LuceneNetSpecific]
+        public virtual void TestLargerRandom_AsStream()
+        {
+            // Sort 100MB worth of data with 15mb buffer.
+            CheckSortAsStream(new OfflineSorter(OfflineSorter.DEFAULT_COMPARER, OfflineSorter.BufferSize.Megabytes(16), OfflineSorter.DefaultTempDir, OfflineSorter.MAX_TEMPFILES), GenerateRandom((int)OfflineSorter.MB * 100));
+        }
+
         private byte[][] GenerateRandom(int howMuchData)
         {
             JCG.List<byte[]> data = new JCG.List<byte[]>();
@@ -136,10 +182,29 @@ namespace Lucene.Net.Util
         /// </summary>
         private OfflineSorter.SortInfo CheckSort(OfflineSorter sort, byte[][] data)
         {
-            using FileStream unsorted = WriteAll("unsorted", data);
+            FileInfo unsorted = WriteAll("unsorted", data);
+
+            Array.Sort(data, unsignedByteOrderComparer);
+            FileInfo golden = WriteAll("golden", data);
+
+            FileInfo sorted = new FileInfo(Path.Combine(tempDir.FullName, "sorted"));
+            OfflineSorter.SortInfo sortInfo = sort.Sort(unsorted, sorted);
+            //System.out.println("Input size [MB]: " + unsorted.Length() / (1024 * 1024));
+            //System.out.println(sortInfo);
+
+            AssertFilesIdentical(golden, sorted);
+            return sortInfo;
+        }
+
+        /// <summary>
+        /// Check sorting data on an instance of <seealso cref="OfflineSorter"/>.
+        /// </summary>
+        private OfflineSorter.SortInfo CheckSortAsStream(OfflineSorter sort, byte[][] data)
+        {
+            using FileStream unsorted = WriteAllAsStream("unsorted", data);
 
             Array.Sort(data, unsignedByteOrderComparer);
-            using FileStream golden = WriteAll("golden", data);
+            using FileStream golden = WriteAllAsStream("golden", data);
 
             string sortedFile = Path.Combine(tempDir.FullName, "sorted");
             using FileStream sorted = new FileStream(sortedFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.Read, bufferSize: OfflineSorter.DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose);
@@ -151,10 +216,33 @@ namespace Lucene.Net.Util
             return sortInfo;
         }
 
+
         /// <summary>
         /// Make sure two files are byte-byte identical.
         /// </summary>
-        // LUCENENET specific - switched to using FileStream rather than FileInfo
+        private void AssertFilesIdentical(FileInfo golden, FileInfo sorted)
+        {
+            Assert.AreEqual(golden.Length, sorted.Length);
+
+            byte[] buf1 = new byte[64 * 1024];
+            byte[] buf2 = new byte[64 * 1024];
+            int len;
+            using Stream is1 = golden.Open(FileMode.Open, FileAccess.Read, FileShare.Delete);
+            using Stream is2 = sorted.Open(FileMode.Open, FileAccess.Read, FileShare.Delete);
+            while ((len = is1.Read(buf1, 0, buf1.Length)) > 0)
+            {
+                is2.Read(buf2, 0, len);
+                for (int i = 0; i < len; i++)
+                {
+                    Assert.AreEqual(buf1[i], buf2[i]);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Make sure two files are byte-byte identical.
+        /// </summary>
+        // LUCENENET specific - test using FileStream rather than FileInfo
         private void AssertFilesIdentical(FileStream golden, FileStream sorted)
         {
             Assert.AreEqual(golden.Length, sorted.Length);
@@ -174,8 +262,21 @@ namespace Lucene.Net.Util
             }
         }
 
-        // LUCENENET specific - switched to using FileStream rather than FileInfo
-        private FileStream WriteAll(string name, byte[][] data)
+        private FileInfo WriteAll(string name, byte[][] data)
+        {
+            FileInfo file = new FileInfo(Path.Combine(tempDir.FullName, name));
+            using (file.Create()) { }
+            OfflineSorter.ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
+            foreach (byte[] datum in data)
+            {
+                w.Write(datum);
+            }
+            w.Dispose();
+            return file;
+        }
+
+        // LUCENENET specific - test using FileStream rather than FileInfo
+        private FileStream WriteAllAsStream(string name, byte[][] data)
         {
             FileInfo file = new FileInfo(Path.Combine(tempDir.FullName, name));
             var stream = new FileStream(file.FullName, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.Read, bufferSize: OfflineSorter.DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose);
diff --git a/src/Lucene.Net/Util/OfflineSorter.cs b/src/Lucene.Net/Util/OfflineSorter.cs
index d484aa1dc..5e0d0ac18 100644
--- a/src/Lucene.Net/Util/OfflineSorter.cs
+++ b/src/Lucene.Net/Util/OfflineSorter.cs
@@ -405,11 +405,33 @@ namespace Lucene.Net.Util
             if (output is null)
                 throw new ArgumentNullException(nameof(output));
 
+            output.Delete();
+
             using FileStream inputStream = new FileStream(input.FullName, FileMode.Open, FileAccess.ReadWrite,
                 FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose | FileOptions.RandomAccess);
-            using FileStream outputStream = new FileStream(output.FullName, FileMode.Open, FileAccess.ReadWrite,
-                FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.DeleteOnClose | FileOptions.RandomAccess);
-            return Sort(inputStream, outputStream);
+            using FileStream outputStream = new FileStream(output.FullName, FileMode.CreateNew, FileAccess.ReadWrite,
+                FileShare.Read, bufferSize: DEFAULT_FILESTREAM_BUFFER_SIZE, FileOptions.RandomAccess);
+            bool success = false;
+            try
+            {
+                var sort = Sort(inputStream, outputStream);
+                success = true;
+                return sort;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        outputStream.Dispose();
+                    }
+                    finally
+                    {
+                        output.Delete();
+                    }
+                }
+            }
         }
 
         /// <summary>