You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/21 05:19:52 UTC
svn commit: r1149014 - in
/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary:
AbbreviationDictionaryTest.java
CaseInsensitiveAbbreviationDictionaryTest.java
CaseSensitiveAbbreviationDictionaryTest.java
Author: colen
Date: Thu Jul 21 03:19:49 2011
New Revision: 1149014
URL: http://svn.apache.org/viewvc?rev=1149014&view=rev
Log:
OPENNLP-234 Split the abbreviation dictionary test in two, each one with a different case sensitivity flag. Today hashCode behaviour is not clear. Should it change according to the case sensitivity flag?
Added:
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java (contents, props changed)
- copied, changed from r1149006, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java (with props)
Removed:
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java
Copied: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java (from r1149006, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java?p2=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java&p1=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java&r1=1149006&r2=1149014&rev=1149014&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java Thu Jul 21 03:19:49 2011
@@ -2,18 +2,27 @@ package opennlp.tools.dictionary;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.io.StringReader;
+import java.io.InputStream;
import opennlp.tools.util.InvalidFormatException;
import org.junit.Test;
-public class AbbreviationDictionaryTest {
+public class CaseInsensitiveAbbreviationDictionaryTest {
+
+ private AbbreviationDictionary getDict() {
+ return new AbbreviationDictionary(false);
+ }
+
+ private AbbreviationDictionary getDict(InputStream in) throws IOException {
+ return new AbbreviationDictionary(in, false);
+ }
/**
* Tests a basic lookup.
@@ -24,16 +33,18 @@ public class AbbreviationDictionaryTest
String a = "a";
String b = "b";
- AbbreviationDictionary dict = new AbbreviationDictionary();
+ AbbreviationDictionary dict = getDict();
dict.add(a);
assertTrue(dict.contains(a));
- assertTrue(!dict.contains(b));
+ assertFalse(dict.contains(b));
+
+ assertTrue(dict.contains(a.toUpperCase()));
}
-
+
/**
- * Tests a basic lookup.
+ * Tests set.
*/
@Test
public void testSet() {
@@ -41,7 +52,7 @@ public class AbbreviationDictionaryTest
String a = "a";
String a1 = "a";
- AbbreviationDictionary dict = new AbbreviationDictionary();
+ AbbreviationDictionary dict = getDict();
dict.add(a);
dict.add(a1);
@@ -51,14 +62,32 @@ public class AbbreviationDictionaryTest
}
/**
+ * Tests set.
+ */
+ @Test
+ public void testSetDiffCase() {
+
+ String a = "a";
+ String a1 = "A";
+
+ AbbreviationDictionary dict = getDict();
+
+ dict.add(a);
+ dict.add(a1);
+
+ assertTrue(dict.contains(a));
+ assertEquals(1, dict.size());
+ }
+
+ /**
* Tests serialization and deserailization of the {@link Dictionary}.
- *
+ *
* @throws IOException
* @throws InvalidFormatException
*/
@Test
public void testSerialization() throws IOException, InvalidFormatException {
- AbbreviationDictionary reference = new AbbreviationDictionary();
+ AbbreviationDictionary reference = getDict();
String a1 = "a1";
String a2 = "a2";
@@ -74,34 +103,12 @@ public class AbbreviationDictionaryTest
reference.serialize(out);
- AbbreviationDictionary recreated = new AbbreviationDictionary(
- new ByteArrayInputStream(out.toByteArray()));
+ AbbreviationDictionary recreated = getDict(new ByteArrayInputStream(
+ out.toByteArray()));
assertTrue(reference.equals(recreated));
}
-
- /**
- * Tests for the {@link Dictionary#parseOneEntryPerLine(java.io.Reader)}
- * method.
- *
- * @throws IOException
- */
- @Test
- public void testParseOneEntryPerLine() throws IOException {
-
- String testDictionary = "1a \n 1b \n 1c\n 1d";
- AbbreviationDictionary dictionay =
- AbbreviationDictionary.parseOneEntryPerLine(new StringReader(testDictionary));
-
- assertTrue(dictionay.size() == 4);
-
- assertTrue(dictionay.contains("1a"));
- assertTrue(dictionay.contains("1b"));
- assertTrue(dictionay.contains("1c"));
- assertTrue(dictionay.contains("1d"));
- }
-
/**
* Tests for the {@link Dictionary#equals(Object)} method.
*/
@@ -110,11 +117,11 @@ public class AbbreviationDictionaryTest
String entry1 = "1a";
String entry2 = "1b";
- AbbreviationDictionary dictA = new AbbreviationDictionary();
+ AbbreviationDictionary dictA = getDict();
dictA.add(entry1);
dictA.add(entry2);
- AbbreviationDictionary dictB = new AbbreviationDictionary();
+ AbbreviationDictionary dictB = getDict();
dictB.add(entry1);
dictB.add(entry2);
@@ -122,50 +129,70 @@ public class AbbreviationDictionaryTest
}
/**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEqualsDifferentCase() {
+
+ AbbreviationDictionary dictA = getDict();
+ dictA.add("1a");
+ dictA.add("1b");
+
+ AbbreviationDictionary dictB = getDict();
+ dictB.add("1A");
+ dictB.add("1B");
+
+ assertTrue(dictA.equals(dictB));
+ }
+
+ /**
* Tests the {@link Dictionary#hashCode()} method.
*/
@Test
public void testHashCode() {
String entry1 = "a1";
- AbbreviationDictionary dictA = new AbbreviationDictionary();
+ AbbreviationDictionary dictA = getDict();
dictA.add(entry1);
- AbbreviationDictionary dictB = new AbbreviationDictionary();
+ AbbreviationDictionary dictB = getDict();
dictB.add(entry1);
assertEquals(dictA.hashCode(), dictB.hashCode());
}
/**
- * Tests the lookup of tokens of different case.
+ * Tests the {@link Dictionary#hashCode()} method.
*/
@Test
- public void testDifferentCaseLookup() {
+ public void testHashCodeDifferentCase() {
+ String entry1 = "a1";
- String entry1 = "1a";
- String entry2 = "1A";
+ AbbreviationDictionary dictA = getDict();
+ dictA.add(entry1);
- AbbreviationDictionary dict = new AbbreviationDictionary(false);
+ AbbreviationDictionary dictB = getDict();
+ dictB.add(entry1.toUpperCase());
- dict.add(entry1);
-
- assertTrue(dict.contains(entry2));
+ // TODO: should it be equal??
+ assertNotSame(dictA.hashCode(), dictB.hashCode());
}
-
+
/**
* Tests the lookup of tokens of different case.
*/
@Test
- public void testDifferentCaseLookupCaseSensitive() {
+ public void testDifferentCaseLookup() {
String entry1 = "1a";
String entry2 = "1A";
- AbbreviationDictionary dict = new AbbreviationDictionary(true);
+ // create a case insensitive dictionary
+ AbbreviationDictionary dict = getDict();
dict.add(entry1);
- assertFalse(dict.contains(entry2));
+ // should return true because 1a = 1A in a case insensitive lookup
+ assertTrue(dict.contains(entry2));
}
}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java?rev=1149014&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java Thu Jul 21 03:19:49 2011
@@ -0,0 +1,220 @@
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+
+import opennlp.tools.util.InvalidFormatException;
+
+import org.junit.Test;
+
+public class CaseSensitiveAbbreviationDictionaryTest {
+
+ private AbbreviationDictionary getDict() {
+ return new AbbreviationDictionary(true);
+ }
+
+ private AbbreviationDictionary getDict(InputStream in) throws IOException {
+ return new AbbreviationDictionary(in, true);
+ }
+
+ /**
+ * Tests a basic lookup.
+ */
+ @Test
+ public void testLookup() {
+
+ String a = "a";
+ String b = "b";
+
+ AbbreviationDictionary dict = getDict();
+
+ dict.add(a);
+
+ assertTrue(dict.contains(a));
+ assertFalse(dict.contains(b));
+
+ assertFalse(dict.contains(a.toUpperCase()));
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSet() {
+
+ String a = "a";
+ String a1 = "a";
+
+ AbbreviationDictionary dict = getDict();
+
+ dict.add(a);
+ dict.add(a1);
+
+ assertTrue(dict.contains(a));
+ assertEquals(1, dict.size());
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSetDiffCase() {
+
+ String a = "a";
+ String a1 = "A";
+
+ AbbreviationDictionary dict = getDict();
+
+ dict.add(a);
+ dict.add(a1);
+
+ assertTrue(dict.contains(a));
+ assertEquals(2, dict.size());
+ }
+
+ /**
+ * Tests serialization and deserailization of the {@link Dictionary}.
+ *
+ * @throws IOException
+ * @throws InvalidFormatException
+ */
+ @Test
+ public void testSerialization() throws IOException, InvalidFormatException {
+ AbbreviationDictionary reference = getDict();
+
+ String a1 = "a1";
+ String a2 = "a2";
+ String a3 = "a3";
+ String a5 = "a5";
+
+ reference.add(a1);
+ reference.add(a2);
+ reference.add(a3);
+ reference.add(a5);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+ reference.serialize(out);
+
+ AbbreviationDictionary recreated = getDict(new ByteArrayInputStream(
+ out.toByteArray()));
+
+ assertTrue(reference.equals(recreated));
+ }
+
+ /**
+ * Tests for the {@link Dictionary#parseOneEntryPerLine(java.io.Reader)}
+ * method.
+ *
+ * @throws IOException
+ */
+ @Test
+ public void testParseOneEntryPerLine() throws IOException {
+ // this test is independent of the case sensitive flag.
+
+ String testDictionary = "1a \n 1b \n 1c\n 1d";
+
+ AbbreviationDictionary dictionay = AbbreviationDictionary
+ .parseOneEntryPerLine(new StringReader(testDictionary));
+
+ assertTrue(dictionay.size() == 4);
+
+ assertTrue(dictionay.contains("1a"));
+ assertTrue(dictionay.contains("1b"));
+ assertTrue(dictionay.contains("1c"));
+ assertTrue(dictionay.contains("1d"));
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEquals() {
+ String entry1 = "1a";
+ String entry2 = "1b";
+
+ AbbreviationDictionary dictA = getDict();
+ dictA.add(entry1);
+ dictA.add(entry2);
+
+ AbbreviationDictionary dictB = getDict();
+ dictB.add(entry1);
+ dictB.add(entry2);
+
+ assertTrue(dictA.equals(dictB));
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEqualsDifferentCase() {
+
+ AbbreviationDictionary dictA = getDict();
+ dictA.add("1a");
+ dictA.add("1b");
+
+ AbbreviationDictionary dictB = getDict();
+ dictB.add("1A");
+ dictB.add("1B");
+
+ // should fail in case sensitive dict
+ assertFalse(dictA.equals(dictB));
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCode() {
+ String entry1 = "a1";
+
+ AbbreviationDictionary dictA = getDict();
+ dictA.add(entry1);
+
+ AbbreviationDictionary dictB = getDict();
+ dictB.add(entry1);
+
+ assertEquals(dictA.hashCode(), dictB.hashCode());
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCodeDifferentCase() {
+ String entry1 = "a1";
+
+ AbbreviationDictionary dictA = getDict();
+ dictA.add(entry1);
+
+ AbbreviationDictionary dictB = getDict();
+ dictB.add(entry1.toUpperCase());
+
+ // TODO: should it be equal??
+ assertNotSame(dictA.hashCode(), dictB.hashCode());
+ }
+
+ /**
+ * Tests the lookup of tokens of different case.
+ */
+ @Test
+ public void testDifferentCaseLookup() {
+
+ String entry1 = "1a";
+ String entry2 = "1A";
+
+ // create a case sensitive dictionary
+ AbbreviationDictionary dict = getDict();
+
+ dict.add(entry1);
+
+ // should return false because 1a != 1A in a case sensitive lookup
+ assertFalse(dict.contains(entry2));
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain