You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2020/06/30 21:58:48 UTC
[lucenenet] 01/09: Lucene.Net.Codecs: Fixed testing condition for
BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing
InvalidOperationException (fixes #267)
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit ed806713e75875fde44c55e973b26d5b58aac91d
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Mon Jun 29 23:47:30 2020 +0700
Lucene.Net.Codecs: Fixed testing condition for BaseTermVectorsFormatTestCase on TermVectorsReaders by throwing InvalidOperationException (fixes #267)
---
.../SimpleText/SimpleTextTermVectorsReader.cs | 15 +++++--
.../Index/BaseTermVectorsFormatTestCase.cs | 34 ++++++++++-----
src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs | 50 ++++++++++++++++++++--
.../CompressingStoredFieldsIndexWriter.cs | 2 +-
.../Compressing/CompressingTermVectorsReader.cs | 12 +++---
.../Codecs/Lucene3x/Lucene3xTermVectorsReader.cs | 17 +++++---
.../Codecs/Lucene40/Lucene40TermVectorsReader.cs | 15 ++++---
7 files changed, 108 insertions(+), 37 deletions(-)
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
index 20be1d8..1b81575 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
@@ -591,12 +591,19 @@ namespace Lucene.Net.Codecs.SimpleText
public override int NextPosition()
{
- // LUCENENET NOTE: In Java, the assertion is being caught in the test (as an AssertionException).
- // Technically, a "possible" (in fact "probable") scenario like this one, we should be throwing
- // an exception, however doing that causes the checkIndex test to fail. The only logical thing we
- // can do to make this compatible is to remove the assert.
//Debug.Assert((_positions != null && _nextPos < _positions.Length) ||
// _startOffsets != null && _nextPos < _startOffsets.Length);
+
+ // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
+ // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
+ // part that is checking for an error after reading to the end of the enumerator.
+
+ // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
+ // in this case, which matches the behavior of Lucene 8. See #267.
+
+ if (((_positions != null && _nextPos < _positions.Length) || _startOffsets != null && _nextPos < _startOffsets.Length) == false)
+ throw new InvalidOperationException("Read past last position");
+
if (_positions != null)
{
return _positions[_nextPos++];
diff --git a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
index df4d339..8bfde1f 100644
--- a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
+++ b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
@@ -16,6 +16,7 @@ using System.Threading;
using JCG = J2N.Collections.Generic;
using static Lucene.Net.Index.TermsEnum;
using Assert = Lucene.Net.TestFramework.Assert;
+using AssertionError = Lucene.Net.Diagnostics.AssertionException;
using Attribute = Lucene.Net.Util.Attribute;
#if TESTFRAMEWORK_MSTEST
@@ -634,17 +635,28 @@ namespace Lucene.Net.Index
Assert.IsTrue(foundPayload);
}
}
- try
- {
- docsAndPositionsEnum.NextPosition();
- Assert.Fail();
- }
-#pragma warning disable 168
- catch (Exception e)
-#pragma warning restore 168
- {
- // ok
- }
+
+ // LUCENENET specific - In Lucene, there were assertions set up inside TVReaders which throw AssertionError
+ // (provided assertions are enabled), which in turn signaled this class to skip the check by catching AssertionError.
+ // In .NET, assertions are not included in the release and cannot be enabled, so there is nothing to catch.
+ // We have to explicitly exclude the types that rely on this behavior from the check. Otherwise, they would fall
+ // through to Assert.Fail().
+ //
+ // We also have a fake AssertionException for testing mocks. We cannot throw InvalidOperationException in those
+ // cases because that exception is expected in other contexts.
+ Assert.ThrowsAnyOf<InvalidOperationException, AssertionError>(() => docsAndPositionsEnum.NextPosition());
+
+// try
+// {
+// docsAndPositionsEnum.NextPosition();
+// Assert.Fail();
+// }
+//#pragma warning disable 168
+// catch (Exception e)
+//#pragma warning restore 168
+// {
+// // ok
+// }
}
Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
}
diff --git a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
index de06202..f0ad9a4 100644
--- a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
+++ b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Util.Fst;
using System;
using System.Collections.Generic;
using System.Diagnostics;
+using System.Text;
namespace Lucene.Net.Codecs
{
@@ -433,11 +434,54 @@ namespace Lucene.Net.Codecs
return "BLOCK: " + Prefix.Utf8ToString();
}
+ // LUCENENET specific - to keep the Debug.Assert statement from throwing exceptions
+ // because of invalid UTF8 code in Prefix, we have a wrapper method that falls back
+ // to using PendingBlock.Prefix.ToString() if PendingBlock.ToString()
+ private string ToString(IList<PendingBlock> blocks) // For assert
+ {
+ if (blocks == null)
+ return "null";
+
+
+ if (blocks.Count == 0)
+ return "[]";
+
+ using (var it = blocks.GetEnumerator())
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append('[');
+ it.MoveNext();
+ while (true)
+ {
+ var e = it.Current;
+ // There is a chance that the Prefix will contain invalid UTF8,
+ // so we catch that and use the alternative way of displaying it
+ try
+ {
+ sb.Append(e.ToString());
+ }
+ catch (IndexOutOfRangeException)
+ {
+ sb.Append("BLOCK: ");
+ sb.Append(e.Prefix.ToString());
+ }
+ if (!it.MoveNext())
+ {
+ return sb.Append(']').ToString();
+ }
+ sb.Append(',').Append(' ');
+ }
+ }
+ }
+
public void CompileIndex(IList<PendingBlock> floorBlocks, RAMOutputStream scratchBytes)
{
- // LUCENENET TODO: floorBlocks cannot be converted using Arrays.ToString() here.
- // It generates an IndexOutOfRangeException()
- Debug.Assert((IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null), "isFloor=" + IsFloor + " floorBlocks=" + floorBlocks /*Arrays.ToString(floorBlocks)*/);
+ // LUCENENET specific - we use a custom wrapper function to display floorBlocks, since
+ // it might contain garbage that cannot be converted into text. This is compiled out
+ // of the relese, though.
+ Debug.Assert(
+ (IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null),
+ "isFloor=" + IsFloor + " floorBlocks=" + ToString(floorBlocks));
Debug.Assert(scratchBytes.GetFilePointer() == 0);
diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
index 588cc2b..d45adb3 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
@@ -211,7 +211,7 @@ namespace Lucene.Net.Codecs.Compressing
{
if (numDocs != totalDocs)
{
- throw new Exception("Expected " + numDocs + " docs, but got " + totalDocs);
+ throw new InvalidOperationException("Expected " + numDocs + " docs, but got " + totalDocs);
}
if (blockChunks > 0)
{
diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
index 7eedc6f..4235250 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingTermVectorsReader.cs
@@ -1071,11 +1071,11 @@ namespace Lucene.Net.Codecs.Compressing
{
if (doc == NO_MORE_DOCS)
{
- throw new Exception("DocsEnum exhausted");
+ throw new InvalidOperationException("DocsEnum exhausted");
}
else if (doc == -1)
{
- throw new Exception("DocsEnum not started");
+ throw new InvalidOperationException("DocsEnum not started");
}
}
@@ -1084,11 +1084,11 @@ namespace Lucene.Net.Codecs.Compressing
CheckDoc();
if (i < 0)
{
- throw new Exception("Position enum not started");
+ throw new InvalidOperationException("Position enum not started");
}
else if (i >= termFreq)
{
- throw new Exception("Read past last position");
+ throw new InvalidOperationException("Read past last position");
}
}
@@ -1096,11 +1096,11 @@ namespace Lucene.Net.Codecs.Compressing
{
if (doc != 0)
{
- throw new Exception();
+ throw new InvalidOperationException();
}
else if (i >= termFreq - 1)
{
- throw new Exception("Read past last position");
+ throw new InvalidOperationException("Read past last position");
}
++i;
diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
index f3535d2..373b580 100644
--- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
@@ -804,13 +804,16 @@ namespace Lucene.Net.Codecs.Lucene3x
public override int NextPosition()
{
- // LUCENENET TODO: on .NET Core 2.0, this assert causes an uncatchable exception.
- // See: https://github.com/Microsoft/vstest/issues/1022
- // Once the issue has been identified and fixed we can remove this conditional
- // compilation for it on .NET Core 2.0.
-#if !NETSTANDARD2_0 && !NETSTANDARD2_1
- Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
-#endif
+ //Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
+
+ // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
+ // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
+ // part that is checking for an error after reading to the end of the enumerator.
+
+ // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
+ // in this case, which matches the behavior of Lucene 8. See #267.
+ if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false)
+ throw new InvalidOperationException("Read past last position");
if (positions != null)
{
diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
index a4781e7..1cd3d66 100644
--- a/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
@@ -808,11 +808,16 @@ namespace Lucene.Net.Codecs.Lucene40
public override int NextPosition()
{
- // LUCENENET TODO: BUG - Need to investigate why this assert sometimes fails
- // which will cause the test runner to crash on .NET Core 2.0
-#if !NETSTANDARD2_0 && !NETSTANDARD2_1
- Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
-#endif
+ //Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
+
+ // LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
+ // caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
+ // part that is checking for an error after reading to the end of the enumerator.
+
+ // Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
+ // in this case, which matches the behavior of Lucene 8. See #267.
+ if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false)
+ throw new InvalidOperationException("Read past last position");
if (positions != null)
{