You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by th...@apache.org on 2011/07/19 01:55:44 UTC
[Lucene.Net] svn commit: r1148109 - in /incubator/lucene.net/trunk:
src/contrib/Analyzers/Contrib.Analyzers.csproj
src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
src/contrib/Analyzers/Util/ListComparer.cs test/core/TestSupportClass.cs
Author: thoward
Date: Mon Jul 18 23:55:44 2011
New Revision: 1148109
URL: http://svn.apache.org/viewvc?rev=1148109&view=rev
Log:
[LUCENENET-437] Updated ShingleMatrixFilter to use SupportClass.EquatableList, removed ListComparer, and added unit tests for EquatableList displaying various issues.
Removed:
incubator/lucene.net/trunk/src/contrib/Analyzers/Util/ListComparer.cs
Modified:
incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
incubator/lucene.net/trunk/test/core/TestSupportClass.cs
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj Mon Jul 18 23:55:44 2011
@@ -97,7 +97,6 @@
<Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
<Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
<Compile Include="Util\FloatHelper.cs" />
- <Compile Include="Util\ListComparer.cs" />
<Compile Include="WordlistLoader.cs" />
</ItemGroup>
<ItemGroup>
Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs Mon Jul 18 23:55:44 2011
@@ -136,7 +136,8 @@ namespace Lucene.Net.Analyzers.Shingle
/// to get the same behaviour.
/// </p>
/// </summary>
- private readonly HashSet<List<Token>> _shinglesSeen = new HashSet<List<Token>>(new ListComparer<Token>());
+ private readonly HashSet<SupportClass.EquatableList<Token>> _shinglesSeen =
+ new HashSet<SupportClass.EquatableList<Token>>();
private readonly TermAttribute _termAtt;
private readonly TypeAttribute _typeAtt;
@@ -382,7 +383,7 @@ namespace Lucene.Net.Analyzers.Shingle
var termLength = 0;
- var shingle = new List<Token>();
+ var shingle = new SupportClass.EquatableList<Token>();
for (int i = 0; i < _currentShingleLength; i++)
{
Modified: incubator/lucene.net/trunk/test/core/TestSupportClass.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/core/TestSupportClass.cs?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/test/core/TestSupportClass.cs (original)
+++ incubator/lucene.net/trunk/test/core/TestSupportClass.cs Mon Jul 18 23:55:44 2011
@@ -17,6 +17,7 @@
using System;
using System.Collections;
+using System.Linq;
using System.Threading;
using Lucene.Net.Index;
@@ -27,6 +28,7 @@ using Lucene.Net.Util;
using Lucene.Net.Store;
using NUnit.Framework;
+using System.Collections.Generic;
@@ -90,7 +92,7 @@ namespace Lucene.Net._SupportClass
Assert.AreEqual(expected, digest.GetValue());
}
}
-
+
[TestFixture]
public class TestWeakHashTable
{
@@ -1207,7 +1209,138 @@ namespace Lucene.Net._SupportClass
Assert.AreEqual(hitCount, hitCount2,"Error in serialization - different hit counts");
}
}
+
+ [TestFixture]
+ public class TestEquatableList
+ {
+ /// <summary>
+ /// This test shows that System.Collections.Generic.List is not suitable for determining uniqueness
+ /// when used in a HashSet. This is a difference between the Java and .NET BCL Lists. Since the equatable
+ /// behaviour of java.util.List.hashCode() in Java is relied upon for the the Java Lucene implementation,
+ /// a correct port must use an equatable list with the same behaviour.
+ ///
+ /// We include this unit test here, to prove the problem with the .NET List type. If this test fails, we
+ /// can remove the SupportClass.EquatableList class, and replace it with System.Collection.Generic.List.
+ /// </summary>
+ [Test]
+ public void System_Collections_Generic_List_Not_Suitable_For_Determining_Uniqueness_In_HashSet()
+ {
+ // reference equality
+ var foo = new Object();
+ var bar = new Object();
+
+ var list1 = new List<Object> {foo, bar};
+ var list2 = new List<Object> {foo, bar};
+
+ var hashSet = new HashSet<List<Object>>();
+
+ Assert.IsTrue(hashSet.Add(list1));
+
+ // note: compare this assertion to the assertion in Suitable_For_Determining_Uniqueness_In_HashSet
+ Assert.IsTrue(hashSet.Add(list2),
+ "BCL List changed equality behaviour and is now suitable for use in HashSet! Yay!");
+ }
+
+ /// <summary>
+ /// This test shows that System.Collections.Generic.List is not suitable for determining uniqueness
+ /// when used in a Hashtable. This is a difference between the Java and .NET BCL Lists. Since the equatable
+ /// behaviour of java.util.List.hashCode() in Java is relied upon for the the Java Lucene implementation,
+ /// a correct port must use an equatable list with the same behaviour.
+ ///
+ /// We include this unit test here, to prove the problem with the .NET List type. If this test fails, we
+ /// can remove the SupportClass.EquatableList class, and replace it with System.Collection.Generic.List.
+ /// </summary>
+ [Test]
+ public void System_Collections_Generic_List_Not_Suitable_For_Determining_Uniqueness_In_Hashtable()
+ {
+ // reference equality
+ var foo = new Object();
+ var bar = new Object();
+
+ var list1 = new List<Object> {foo, bar};
+ var list2 = new List<Object> {foo, bar};
+
+ var hashTable = new Hashtable();
+
+ Assert.IsFalse(hashTable.ContainsKey(list1));
+ hashTable.Add(list1, list1);
+
+ // note: compare this assertion to the assertion in Suitable_For_Determining_Uniqueness_In_Hashtable
+ Assert.IsFalse(
+ hashTable.ContainsKey(list2),
+ "BCL List changed behaviour and is now suitable for use as a replacement for Java's List! Yay!");
+ }
+
+ /// <summary>
+ /// There is a interesting problem with .NET's String.GetHashCode() for certain strings.
+ /// This unit test displays the problem, and in the event that this is changed in the
+ /// .NET runtime, the test will fail.
+ ///
+ /// This is one of the reasons that the EquatableList implementation does not use GetHashCode()
+ /// (which is a divergence from the List.equals implementation in Java). EquatableList should have
+ /// the same overall results as Java's List however.
+ ///
+ /// For an explanation of this issue see:
+ /// http://blogs.msdn.com/b/ericlippert/archive/2011/07/12/what-curious-property-does-this-string-have.aspx
+ /// For a description of the GetHashCode implementation see:
+ /// http://www.dotnetperls.com/gethashcode
+ /// For documentation on List.getHashCode(), see:
+ /// http://download.oracle.com/javase/6/docs/api/java/util/List.html#hashCode()
+ /// And in the general case, see:
+ /// http://download.oracle.com/javase/6/docs/api/java/lang/Object.html#hashCode()
+ /// </summary>
+ [Test]
+ public void System_String_GetHashCode_Exhibits_Inconsistent_Inequality_For_Some_Values()
+ {
+ var val1 = "\uA0A2\uA0A2";
+ var val2 = string.Empty;
+ Assert.IsFalse(val1.Equals(val2));
+
+ var hash1 = val1.GetHashCode();
+ var hash2 = val2.GetHashCode();
+
+ // note: this is counter-intuative, but technically allowed by the contract for GetHashCode()
+ Assert.IsTrue(
+ hash1.Equals(hash2),
+ "BCL string.GetHashCode() no longer exhibits inconsistent inequality for certain strings."
+ );
+ }
+
+ [Test]
+ public void Suitable_For_Determining_Uniqueness_In_HashSet()
+ {
+ var foo = new Object();
+ var bar = new Object();
+
+ var list1 = new SupportClass.EquatableList<Object> {foo, bar};
+ var list2 = new SupportClass.EquatableList<Object> {foo, bar};
+
+ Assert.AreEqual(list1, list2);
+
+ var hashSet = new HashSet<List<Object>>();
+
+ Assert.IsTrue(hashSet.Add(list1));
+ Assert.IsFalse(hashSet.Add(list2));
+ }
+
+ [Test]
+ public void Suitable_For_Determining_Uniqueness_In_Hashtable()
+ {
+ var foo = new Object();
+ var bar = new Object();
+
+ var list1 = new SupportClass.EquatableList<Object> { foo, bar };
+ var list2 = new SupportClass.EquatableList<Object> { foo, bar };
+
+ var hashTable = new Hashtable();
+
+ Assert.IsFalse(hashTable.ContainsKey(list1));
+ hashTable.Add(list1, list1);
+
+ Assert.IsTrue(hashTable.ContainsKey(list2));
+ }
+ }
}