You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/21 05:19:52 UTC

svn commit: r1149014 - in /incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary: AbbreviationDictionaryTest.java CaseInsensitiveAbbreviationDictionaryTest.java CaseSensitiveAbbreviationDictionaryTest.java

Author: colen
Date: Thu Jul 21 03:19:49 2011
New Revision: 1149014

URL: http://svn.apache.org/viewvc?rev=1149014&view=rev
Log:
OPENNLP-234 Split the abbreviation dictionary test in two, each one with a different case sensitivity flag. Today hashCode behaviour is not clear. Should it change according to the case sensitivity flag?

Added:
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java   (contents, props changed)
      - copied, changed from r1149006, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java   (with props)
Removed:
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java

Copied: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java (from r1149006, incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java)
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java?p2=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java&p1=incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java&r1=1149006&r2=1149014&rev=1149014&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/AbbreviationDictionaryTest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java Thu Jul 21 03:19:49 2011
@@ -2,18 +2,27 @@ package opennlp.tools.dictionary;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.StringReader;
+import java.io.InputStream;
 
 import opennlp.tools.util.InvalidFormatException;
 
 import org.junit.Test;
 
-public class AbbreviationDictionaryTest {
+public class CaseInsensitiveAbbreviationDictionaryTest {
+
+  private AbbreviationDictionary getDict() {
+    return new AbbreviationDictionary(false);
+  }
+
+  private AbbreviationDictionary getDict(InputStream in) throws IOException {
+    return new AbbreviationDictionary(in, false);
+  }
 
   /**
    * Tests a basic lookup.
@@ -24,16 +33,18 @@ public class AbbreviationDictionaryTest 
     String a = "a";
     String b = "b";
 
-    AbbreviationDictionary dict = new AbbreviationDictionary();
+    AbbreviationDictionary dict = getDict();
 
     dict.add(a);
 
     assertTrue(dict.contains(a));
-    assertTrue(!dict.contains(b));
+    assertFalse(dict.contains(b));
+    
+    assertTrue(dict.contains(a.toUpperCase()));
   }
-  
+
   /**
-   * Tests a basic lookup.
+   * Tests set.
    */
   @Test
   public void testSet() {
@@ -41,7 +52,7 @@ public class AbbreviationDictionaryTest 
     String a = "a";
     String a1 = "a";
 
-    AbbreviationDictionary dict = new AbbreviationDictionary();
+    AbbreviationDictionary dict = getDict();
 
     dict.add(a);
     dict.add(a1);
@@ -51,14 +62,32 @@ public class AbbreviationDictionaryTest 
   }
   
   /**
+   * Tests set.
+   */
+  @Test
+  public void testSetDiffCase() {
+
+    String a = "a";
+    String a1 = "A";
+
+    AbbreviationDictionary dict = getDict();
+
+    dict.add(a);
+    dict.add(a1);
+
+    assertTrue(dict.contains(a));
+    assertEquals(1, dict.size());
+  }
+
+  /**
    * Tests serialization and deserailization of the {@link Dictionary}.
-   *
+   * 
    * @throws IOException
    * @throws InvalidFormatException
    */
   @Test
   public void testSerialization() throws IOException, InvalidFormatException {
-    AbbreviationDictionary reference = new AbbreviationDictionary();
+    AbbreviationDictionary reference = getDict();
 
     String a1 = "a1";
     String a2 = "a2";
@@ -74,34 +103,12 @@ public class AbbreviationDictionaryTest 
 
     reference.serialize(out);
 
-    AbbreviationDictionary recreated = new AbbreviationDictionary(
-        new ByteArrayInputStream(out.toByteArray()));
+    AbbreviationDictionary recreated = getDict(new ByteArrayInputStream(
+        out.toByteArray()));
 
     assertTrue(reference.equals(recreated));
   }
-  
-  /**
-   * Tests for the {@link Dictionary#parseOneEntryPerLine(java.io.Reader)}
-   * method.
-   *
-   * @throws IOException
-   */
-  @Test
-  public void testParseOneEntryPerLine() throws IOException {
-
-    String testDictionary = "1a \n 1b \n 1c\n 1d";
 
-    AbbreviationDictionary dictionay =
-      AbbreviationDictionary.parseOneEntryPerLine(new StringReader(testDictionary));
-
-    assertTrue(dictionay.size() == 4);
-
-    assertTrue(dictionay.contains("1a"));
-    assertTrue(dictionay.contains("1b"));
-    assertTrue(dictionay.contains("1c"));
-    assertTrue(dictionay.contains("1d"));
-  }
-  
   /**
    * Tests for the {@link Dictionary#equals(Object)} method.
    */
@@ -110,11 +117,11 @@ public class AbbreviationDictionaryTest 
     String entry1 = "1a";
     String entry2 = "1b";
 
-    AbbreviationDictionary dictA = new AbbreviationDictionary();
+    AbbreviationDictionary dictA = getDict();
     dictA.add(entry1);
     dictA.add(entry2);
 
-    AbbreviationDictionary dictB = new AbbreviationDictionary();
+    AbbreviationDictionary dictB = getDict();
     dictB.add(entry1);
     dictB.add(entry2);
 
@@ -122,50 +129,70 @@ public class AbbreviationDictionaryTest 
   }
   
   /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEqualsDifferentCase() {
+
+    AbbreviationDictionary dictA = getDict();
+    dictA.add("1a");
+    dictA.add("1b");
+
+    AbbreviationDictionary dictB = getDict();
+    dictB.add("1A");
+    dictB.add("1B");
+
+    assertTrue(dictA.equals(dictB));
+  }
+
+  /**
    * Tests the {@link Dictionary#hashCode()} method.
    */
   @Test
   public void testHashCode() {
     String entry1 = "a1";
 
-    AbbreviationDictionary dictA = new AbbreviationDictionary();
+    AbbreviationDictionary dictA = getDict();
     dictA.add(entry1);
 
-    AbbreviationDictionary dictB = new AbbreviationDictionary();
+    AbbreviationDictionary dictB = getDict();
     dictB.add(entry1);
 
     assertEquals(dictA.hashCode(), dictB.hashCode());
   }
   
   /**
-   * Tests the lookup of tokens of different case.
+   * Tests the {@link Dictionary#hashCode()} method.
    */
   @Test
-  public void testDifferentCaseLookup() {
+  public void testHashCodeDifferentCase() {
+    String entry1 = "a1";
 
-    String entry1 = "1a";
-    String entry2 = "1A";
+    AbbreviationDictionary dictA = getDict();
+    dictA.add(entry1);
 
-    AbbreviationDictionary dict = new AbbreviationDictionary(false);
+    AbbreviationDictionary dictB = getDict();
+    dictB.add(entry1.toUpperCase());
 
-    dict.add(entry1);
-
-    assertTrue(dict.contains(entry2));
+    // TODO: should it be equal??
+    assertNotSame(dictA.hashCode(), dictB.hashCode());
   }
-  
+
   /**
    * Tests the lookup of tokens of different case.
    */
   @Test
-  public void testDifferentCaseLookupCaseSensitive() {
+  public void testDifferentCaseLookup() {
 
     String entry1 = "1a";
     String entry2 = "1A";
 
-    AbbreviationDictionary dict = new AbbreviationDictionary(true);
+    // create a case insensitive dictionary
+    AbbreviationDictionary dict = getDict();
 
     dict.add(entry1);
 
-    assertFalse(dict.contains(entry2));
+    // should return true because 1a = 1A in a case insensitive lookup
+    assertTrue(dict.contains(entry2));
   }
 }

Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseInsensitiveAbbreviationDictionaryTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java?rev=1149014&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java Thu Jul 21 03:19:49 2011
@@ -0,0 +1,220 @@
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+
+import opennlp.tools.util.InvalidFormatException;
+
+import org.junit.Test;
+
+public class CaseSensitiveAbbreviationDictionaryTest {
+
+  private AbbreviationDictionary getDict() {
+    return new AbbreviationDictionary(true);
+  }
+
+  private AbbreviationDictionary getDict(InputStream in) throws IOException {
+    return new AbbreviationDictionary(in, true);
+  }
+
+  /**
+   * Tests a basic lookup.
+   */
+  @Test
+  public void testLookup() {
+
+    String a = "a";
+    String b = "b";
+
+    AbbreviationDictionary dict = getDict();
+
+    dict.add(a);
+
+    assertTrue(dict.contains(a));
+    assertFalse(dict.contains(b));
+    
+    assertFalse(dict.contains(a.toUpperCase()));
+  }
+
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSet() {
+
+    String a = "a";
+    String a1 = "a";
+
+    AbbreviationDictionary dict = getDict();
+
+    dict.add(a);
+    dict.add(a1);
+
+    assertTrue(dict.contains(a));
+    assertEquals(1, dict.size());
+  }
+  
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSetDiffCase() {
+
+    String a = "a";
+    String a1 = "A";
+
+    AbbreviationDictionary dict = getDict();
+
+    dict.add(a);
+    dict.add(a1);
+
+    assertTrue(dict.contains(a));
+    assertEquals(2, dict.size());
+  }
+
+  /**
+   * Tests serialization and deserailization of the {@link Dictionary}.
+   * 
+   * @throws IOException
+   * @throws InvalidFormatException
+   */
+  @Test
+  public void testSerialization() throws IOException, InvalidFormatException {
+    AbbreviationDictionary reference = getDict();
+
+    String a1 = "a1";
+    String a2 = "a2";
+    String a3 = "a3";
+    String a5 = "a5";
+
+    reference.add(a1);
+    reference.add(a2);
+    reference.add(a3);
+    reference.add(a5);
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+    reference.serialize(out);
+
+    AbbreviationDictionary recreated = getDict(new ByteArrayInputStream(
+        out.toByteArray()));
+
+    assertTrue(reference.equals(recreated));
+  }
+
+  /**
+   * Tests for the {@link Dictionary#parseOneEntryPerLine(java.io.Reader)}
+   * method.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testParseOneEntryPerLine() throws IOException {
+    // this test is independent of the case sensitive flag.
+    
+    String testDictionary = "1a \n 1b \n 1c\n 1d";
+
+    AbbreviationDictionary dictionay = AbbreviationDictionary
+        .parseOneEntryPerLine(new StringReader(testDictionary));
+
+    assertTrue(dictionay.size() == 4);
+
+    assertTrue(dictionay.contains("1a"));
+    assertTrue(dictionay.contains("1b"));
+    assertTrue(dictionay.contains("1c"));
+    assertTrue(dictionay.contains("1d"));
+  }
+
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEquals() {
+    String entry1 = "1a";
+    String entry2 = "1b";
+
+    AbbreviationDictionary dictA = getDict();
+    dictA.add(entry1);
+    dictA.add(entry2);
+
+    AbbreviationDictionary dictB = getDict();
+    dictB.add(entry1);
+    dictB.add(entry2);
+
+    assertTrue(dictA.equals(dictB));
+  }
+  
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEqualsDifferentCase() {
+
+    AbbreviationDictionary dictA = getDict();
+    dictA.add("1a");
+    dictA.add("1b");
+
+    AbbreviationDictionary dictB = getDict();
+    dictB.add("1A");
+    dictB.add("1B");
+
+    // should fail in case sensitive dict
+    assertFalse(dictA.equals(dictB));
+  }
+
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCode() {
+    String entry1 = "a1";
+
+    AbbreviationDictionary dictA = getDict();
+    dictA.add(entry1);
+
+    AbbreviationDictionary dictB = getDict();
+    dictB.add(entry1);
+
+    assertEquals(dictA.hashCode(), dictB.hashCode());
+  }
+  
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCodeDifferentCase() {
+    String entry1 = "a1";
+
+    AbbreviationDictionary dictA = getDict();
+    dictA.add(entry1);
+
+    AbbreviationDictionary dictB = getDict();
+    dictB.add(entry1.toUpperCase());
+
+    // TODO: should it be equal??
+    assertNotSame(dictA.hashCode(), dictB.hashCode());
+  }
+
+  /**
+   * Tests the lookup of tokens of different case.
+   */
+  @Test
+  public void testDifferentCaseLookup() {
+
+    String entry1 = "1a";
+    String entry2 = "1A";
+
+    // create a case sensitive dictionary
+    AbbreviationDictionary dict = getDict();
+
+    dict.add(entry1);
+
+    // should return false because 1a != 1A in a case sensitive lookup
+    assertFalse(dict.contains(entry2));
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/CaseSensitiveAbbreviationDictionaryTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain