You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/21 22:26:40 UTC

svn commit: r1149342 - in /incubator/opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/dictionary/ test/java/opennlp/tools/dictionary/

Author: colen
Date: Thu Jul 21 20:26:39 2011
New Revision: 1149342

URL: http://svn.apache.org/viewvc?rev=1149342&view=rev
Log:
OPENNLP-225 Added asStringSet method to Dictionary.

Added:
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java   (with props)
    incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java   (with props)
Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java?rev=1149342&r1=1149341&r2=1149342&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java Thu Jul 21 20:26:39 2011
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Reader;
+import java.util.AbstractSet;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
@@ -278,4 +279,57 @@ public class Dictionary implements Itera
 
     return dictionary;
   }
+
+  /**
+   * Gets this dictionary as a {@code Set<String>}. Only {@code iterator()},
+   * {@code size()} and {@code contains(Object)} methods are implemented.
+   * 
+   * If this dictionary entries are multi tokens only the first token of the
+   * entry will be part of the Set.
+   * 
+   * @return a Set containing the entries of this dictionary
+   */
+  public Set<String> asStringSet() {
+    return new AbstractSet<String>() {
+
+      public Iterator<String> iterator() {
+        final Iterator<StringListWrapper> entries = entrySet.iterator();
+
+        return new Iterator<String>() {
+
+          public boolean hasNext() {
+            return entries.hasNext();
+          }
+
+          public String next() {
+            return entries.next().getStringList().getToken(0);
+          }
+
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+
+      @Override
+      public int size() {
+        return entrySet.size();
+      }
+
+      @Override
+      public boolean contains(Object obj) {
+        boolean result = false;
+
+        if (obj instanceof String) {
+          String str = (String) obj;
+
+          result = entrySet.contains(new StringListWrapper(new StringList(str),
+              caseSensitive));
+
+        }
+
+        return result;
+      }
+    };
+  }
 }

Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java?rev=1149342&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java Thu Jul 21 20:26:39 2011
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.StringList;
+
+import org.junit.Test;
+
+public class DictionaryAsSetCaseInsensitiveTest {
+
+  private Dictionary getDict() {
+    return new Dictionary(false);
+  }
+  
+  private StringList asSL(String str) {
+    return new StringList(str);
+  }
+
+  /**
+   * Tests a basic lookup.
+   */
+  @Test
+  public void testLookup() {
+
+    String a = "a";
+    String b = "b";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertFalse(set.contains(b));
+    
+    assertTrue(set.contains(a.toUpperCase()));
+  }
+
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSet() {
+
+    String a = "a";
+    String a1 = "a";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    dict.put(asSL(a1));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertEquals(1, set.size());
+  }
+  
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSetDiffCase() {
+
+    String a = "a";
+    String a1 = "A";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    dict.put(asSL(a1));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertEquals(1, set.size());
+  }
+
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEquals() {
+    String entry1 = "1a";
+    String entry2 = "1b";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    dictA.put(asSL(entry2));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1));
+    dictB.put(asSL(entry2));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    assertTrue(setA.equals(setB));
+  }
+  
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEqualsDifferentCase() {
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL("1a"));
+    dictA.put(asSL("1b"));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL("1A"));
+    dictB.put(asSL("1B"));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    assertTrue(setA.equals(setB));
+  }
+
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCode() {
+    String entry1 = "a1";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1));
+
+    Set<String> setB = dictB.asStringSet();
+
+    assertEquals(setA.hashCode(), setB.hashCode());
+  }
+  
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCodeDifferentCase() {
+    String entry1 = "a1";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1.toUpperCase()));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    // TODO: should it be equal??
+    assertNotSame(setA.hashCode(), setB.hashCode());
+  }
+
+  /**
+   * Tests the lookup of tokens of different case.
+   */
+  @Test
+  public void testDifferentCaseLookup() {
+
+    String entry1 = "1a";
+    String entry2 = "1A";
+
+    // create a case sensitive dictionary
+    Dictionary dict = getDict();
+
+    dict.put(asSL(entry1));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(entry2));
+  }
+  
+  /**
+   * Tests the iterator implementation
+   */
+  @Test
+  public void testIterator() {
+    
+    String entry1 = "1a";
+    String entry2 = "1b";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    dictA.put(asSL(entry2));
+    dictA.put(asSL(entry1.toUpperCase()));
+    dictA.put(asSL(entry2.toUpperCase()));
+    
+    Iterator<String> it = dictA.asStringSet().iterator();
+    List<String> elements = new ArrayList<String>();
+    while (it.hasNext()) {
+      elements.add(it.next());
+    }
+    
+    assertEquals(2, elements.size());
+    assertTrue(elements.contains(entry1));
+    assertTrue(elements.contains(entry2));
+    
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java?rev=1149342&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java Thu Jul 21 20:26:39 2011
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.StringList;
+
+import org.junit.Test;
+
+public class DictionaryAsSetCaseSensitiveTest {
+
+  private Dictionary getDict() {
+    return new Dictionary(true);
+  }
+  
+  private StringList asSL(String str) {
+    return new StringList(str);
+  }
+
+  /**
+   * Tests a basic lookup.
+   */
+  @Test
+  public void testLookup() {
+
+    String a = "a";
+    String b = "b";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertFalse(set.contains(b));
+    
+    assertFalse(set.contains(a.toUpperCase()));
+  }
+
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSet() {
+
+    String a = "a";
+    String a1 = "a";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    dict.put(asSL(a1));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertEquals(1, set.size());
+  }
+  
+  /**
+   * Tests set.
+   */
+  @Test
+  public void testSetDiffCase() {
+
+    String a = "a";
+    String a1 = "A";
+
+    Dictionary dict = getDict();
+
+    dict.put(asSL(a));
+    dict.put(asSL(a1));
+    
+    Set<String> set = dict.asStringSet();
+
+    assertTrue(set.contains(a));
+    assertEquals(2, set.size());
+  }
+
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEquals() {
+    String entry1 = "1a";
+    String entry2 = "1b";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    dictA.put(asSL(entry2));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1));
+    dictB.put(asSL(entry2));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    assertTrue(setA.equals(setB));
+  }
+  
+  /**
+   * Tests for the {@link Dictionary#equals(Object)} method.
+   */
+  @Test
+  public void testEqualsDifferentCase() {
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL("1a"));
+    dictA.put(asSL("1b"));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL("1A"));
+    dictB.put(asSL("1B"));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    // should fail in case sensitive dict
+    assertFalse(setA.equals(setB));
+  }
+
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCode() {
+    String entry1 = "a1";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1));
+
+    Set<String> setB = dictB.asStringSet();
+
+    assertEquals(setA.hashCode(), setB.hashCode());
+  }
+  
+  /**
+   * Tests the {@link Dictionary#hashCode()} method.
+   */
+  @Test
+  public void testHashCodeDifferentCase() {
+    String entry1 = "a1";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    
+    Set<String> setA = dictA.asStringSet();
+
+    Dictionary dictB = getDict();
+    dictB.put(asSL(entry1.toUpperCase()));
+    
+    Set<String> setB = dictB.asStringSet();
+
+    // TODO: should it be equal??
+    assertNotSame(setA.hashCode(), setB.hashCode());
+  }
+
+  /**
+   * Tests the lookup of tokens of different case.
+   */
+  @Test
+  public void testDifferentCaseLookup() {
+
+    String entry1 = "1a";
+    String entry2 = "1A";
+
+    // create a case sensitive dictionary
+    Dictionary dict = getDict();
+
+    dict.put(asSL(entry1));
+    
+    Set<String> set = dict.asStringSet();
+
+    // should return false because 1a != 1A in a case sensitive lookup
+    assertFalse(set.contains(entry2));
+  }
+  
+  /**
+   * Tests the iterator implementation
+   */
+  @Test
+  public void testIterator() {
+    
+    String entry1 = "1a";
+    String entry2 = "1b";
+
+    Dictionary dictA = getDict();
+    dictA.put(asSL(entry1));
+    dictA.put(asSL(entry2));
+    dictA.put(asSL(entry1.toUpperCase()));
+    dictA.put(asSL(entry2.toUpperCase()));
+    
+    Iterator<String> it = dictA.asStringSet().iterator();
+    List<String> elements = new ArrayList<String>();
+    while (it.hasNext()) {
+      elements.add(it.next());
+    }
+    
+    assertEquals(4, elements.size());
+    assertTrue(elements.contains(entry1));
+    assertTrue(elements.contains(entry2));
+    assertTrue(elements.contains(entry1.toUpperCase()));
+    assertTrue(elements.contains(entry2.toUpperCase()));
+    
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain