You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by th...@apache.org on 2011/07/19 01:55:44 UTC

[Lucene.Net] svn commit: r1148109 - in /incubator/lucene.net/trunk: src/contrib/Analyzers/Contrib.Analyzers.csproj src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs src/contrib/Analyzers/Util/ListComparer.cs test/core/TestSupportClass.cs

Author: thoward
Date: Mon Jul 18 23:55:44 2011
New Revision: 1148109

URL: http://svn.apache.org/viewvc?rev=1148109&view=rev
Log:
[LUCENENET-437] Updated ShingleMatrixFilter to use SupportClass.EquatableList, removed ListComparer, and added unit tests for EquatableList displaying various issues.

Removed:
    incubator/lucene.net/trunk/src/contrib/Analyzers/Util/ListComparer.cs
Modified:
    incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
    incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
    incubator/lucene.net/trunk/test/core/TestSupportClass.cs

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj Mon Jul 18 23:55:44 2011
@@ -97,7 +97,6 @@
     <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
     <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
     <Compile Include="Util\FloatHelper.cs" />
-    <Compile Include="Util\ListComparer.cs" />
     <Compile Include="WordlistLoader.cs" />
   </ItemGroup>
   <ItemGroup>

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs Mon Jul 18 23:55:44 2011
@@ -136,7 +136,8 @@ namespace Lucene.Net.Analyzers.Shingle
         /// to get the same behaviour.
         /// </p>
         /// </summary>
-        private readonly HashSet<List<Token>> _shinglesSeen = new HashSet<List<Token>>(new ListComparer<Token>());
+        private readonly HashSet<SupportClass.EquatableList<Token>> _shinglesSeen =
+            new HashSet<SupportClass.EquatableList<Token>>(); 
 
         private readonly TermAttribute _termAtt;
         private readonly TypeAttribute _typeAtt;
@@ -382,7 +383,7 @@ namespace Lucene.Net.Analyzers.Shingle
 
                     var termLength = 0;
 
-                    var shingle = new List<Token>();
+                    var shingle = new SupportClass.EquatableList<Token>();
 
                     for (int i = 0; i < _currentShingleLength; i++)
                     {

Modified: incubator/lucene.net/trunk/test/core/TestSupportClass.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/core/TestSupportClass.cs?rev=1148109&r1=1148108&r2=1148109&view=diff
==============================================================================
--- incubator/lucene.net/trunk/test/core/TestSupportClass.cs (original)
+++ incubator/lucene.net/trunk/test/core/TestSupportClass.cs Mon Jul 18 23:55:44 2011
@@ -17,6 +17,7 @@
 
 using System;
 using System.Collections;
+using System.Linq;
 using System.Threading;
 
 using Lucene.Net.Index;
@@ -27,6 +28,7 @@ using Lucene.Net.Util;
 using Lucene.Net.Store;
 
 using NUnit.Framework;
+using System.Collections.Generic;
 
 
 
@@ -90,7 +92,7 @@ namespace Lucene.Net._SupportClass
             Assert.AreEqual(expected, digest.GetValue());
         }
     }
-
+    
     [TestFixture]
     public class TestWeakHashTable
     {
@@ -1207,7 +1209,138 @@ namespace Lucene.Net._SupportClass
             Assert.AreEqual(hitCount, hitCount2,"Error in serialization - different hit counts");
         }
     }
+    
+    [TestFixture]
+    public class TestEquatableList
+    {
+        /// <summary>
+        /// This test shows that System.Collections.Generic.List is not suitable for determining uniqueness 
+        /// when used in a HashSet. This is a difference between the Java and .NET BCL Lists. Since the equatable 
+        /// behaviour of java.util.List.hashCode() in Java is relied upon for the the Java Lucene implementation, 
+        /// a correct port must use an equatable list with the same behaviour.
+        /// 
+        /// We include this unit test here, to prove the problem with the .NET List type. If this test fails, we 
+        /// can remove the SupportClass.EquatableList class, and replace it with System.Collection.Generic.List. 
+        /// </summary>
+        [Test]
+        public void System_Collections_Generic_List_Not_Suitable_For_Determining_Uniqueness_In_HashSet()
+        {
+            // reference equality 
+            var foo = new Object();
+            var bar = new Object();
+
+            var list1 = new List<Object> {foo, bar};
+            var list2 = new List<Object> {foo, bar};
+
+            var hashSet = new HashSet<List<Object>>();
+
+            Assert.IsTrue(hashSet.Add(list1));
+
+            // note: compare this assertion to the assertion in Suitable_For_Determining_Uniqueness_In_HashSet
+            Assert.IsTrue(hashSet.Add(list2),
+                "BCL List changed equality behaviour and is now suitable for use in HashSet! Yay!");
+        }
+
+        /// <summary>
+        /// This test shows that System.Collections.Generic.List is not suitable for determining uniqueness 
+        /// when used in a Hashtable. This is a difference between the Java and .NET BCL Lists. Since the equatable 
+        /// behaviour of java.util.List.hashCode() in Java is relied upon for the the Java Lucene implementation, 
+        /// a correct port must use an equatable list with the same behaviour.
+        /// 
+        /// We include this unit test here, to prove the problem with the .NET List type. If this test fails, we 
+        /// can remove the SupportClass.EquatableList class, and replace it with System.Collection.Generic.List. 
+        /// </summary>
+        [Test]
+        public void System_Collections_Generic_List_Not_Suitable_For_Determining_Uniqueness_In_Hashtable()
+        {
+            // reference equality 
+            var foo = new Object();
+            var bar = new Object();
+
+            var list1 = new List<Object> {foo, bar};
+            var list2 = new List<Object> {foo, bar};
+
+            var hashTable = new Hashtable();
+
+            Assert.IsFalse(hashTable.ContainsKey(list1));
+            hashTable.Add(list1, list1);
+
+            // note: compare this assertion to the assertion in Suitable_For_Determining_Uniqueness_In_Hashtable
+            Assert.IsFalse(
+                hashTable.ContainsKey(list2),
+                "BCL List changed behaviour and is now suitable for use as a replacement for Java's List! Yay!");
+        }
+
+        /// <summary>
+        /// There is a interesting problem with .NET's String.GetHashCode() for certain strings. 
+        /// This unit test displays the problem, and in the event that this is changed in the 
+        /// .NET runtime, the test will fail. 
+        /// 
+        /// This is one of the reasons that the EquatableList implementation does not use GetHashCode() 
+        /// (which is a divergence from the List.equals implementation in Java). EquatableList should have 
+        /// the same overall results as Java's List however.
+        /// 
+        /// For an explanation of this issue see: 
+        /// http://blogs.msdn.com/b/ericlippert/archive/2011/07/12/what-curious-property-does-this-string-have.aspx
+        /// For a description of the GetHashCode implementation see: 
+        /// http://www.dotnetperls.com/gethashcode
+        /// For documentation on List.getHashCode(), see: 
+        /// http://download.oracle.com/javase/6/docs/api/java/util/List.html#hashCode()
+        /// And in the general case, see:
+        /// http://download.oracle.com/javase/6/docs/api/java/lang/Object.html#hashCode()
+        /// </summary>
+        [Test]
+        public void System_String_GetHashCode_Exhibits_Inconsistent_Inequality_For_Some_Values()
+        {
+            var val1 = "\uA0A2\uA0A2";
+            var val2 = string.Empty;
 
+            Assert.IsFalse(val1.Equals(val2));
+
+            var hash1 = val1.GetHashCode();
+            var hash2 = val2.GetHashCode();
+
+            // note: this is counter-intuative, but technically allowed by the contract for GetHashCode()
+            Assert.IsTrue(
+                hash1.Equals(hash2), 
+                "BCL string.GetHashCode() no longer exhibits inconsistent inequality for certain strings."
+                );
+        }
+
+        [Test]
+        public void Suitable_For_Determining_Uniqueness_In_HashSet()
+        {
+            var foo = new Object();
+            var bar = new Object();
+
+            var list1 = new SupportClass.EquatableList<Object> {foo, bar};
+            var list2 = new SupportClass.EquatableList<Object> {foo, bar};
+
+            Assert.AreEqual(list1, list2);
+
+            var hashSet = new HashSet<List<Object>>();
+
+            Assert.IsTrue(hashSet.Add(list1));
+            Assert.IsFalse(hashSet.Add(list2));
+        }
+
+        [Test]
+        public void Suitable_For_Determining_Uniqueness_In_Hashtable()
+        {
+            var foo = new Object();
+            var bar = new Object();
+
+            var list1 = new SupportClass.EquatableList<Object> { foo, bar };
+            var list2 = new SupportClass.EquatableList<Object> { foo, bar };
+
+            var hashTable = new Hashtable();
+
+            Assert.IsFalse(hashTable.ContainsKey(list1));
+            hashTable.Add(list1, list1);
+
+            Assert.IsTrue(hashTable.ContainsKey(list2));
+        }
+    }
 }