You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/21 22:26:40 UTC
svn commit: r1149342 - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/dictionary/ test/java/opennlp/tools/dictionary/
Author: colen
Date: Thu Jul 21 20:26:39 2011
New Revision: 1149342
URL: http://svn.apache.org/viewvc?rev=1149342&view=rev
Log:
OPENNLP-225 Added asStringSet method to Dictionary.
Added:
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java (with props)
incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java (with props)
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java?rev=1149342&r1=1149341&r2=1149342&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java Thu Jul 21 20:26:39 2011
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
+import java.util.AbstractSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
@@ -278,4 +279,57 @@ public class Dictionary implements Itera
return dictionary;
}
+
+ /**
+ * Gets this dictionary as a {@code Set<String>}. Only {@code iterator()},
+ * {@code size()} and {@code contains(Object)} methods are implemented.
+ *
+ * If this dictionary entries are multi tokens only the first token of the
+ * entry will be part of the Set.
+ *
+ * @return a Set containing the entries of this dictionary
+ */
+ public Set<String> asStringSet() {
+ return new AbstractSet<String>() {
+
+ public Iterator<String> iterator() {
+ final Iterator<StringListWrapper> entries = entrySet.iterator();
+
+ return new Iterator<String>() {
+
+ public boolean hasNext() {
+ return entries.hasNext();
+ }
+
+ public String next() {
+ return entries.next().getStringList().getToken(0);
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+
+ @Override
+ public int size() {
+ return entrySet.size();
+ }
+
+ @Override
+ public boolean contains(Object obj) {
+ boolean result = false;
+
+ if (obj instanceof String) {
+ String str = (String) obj;
+
+ result = entrySet.contains(new StringListWrapper(new StringList(str),
+ caseSensitive));
+
+ }
+
+ return result;
+ }
+ };
+ }
}
Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java?rev=1149342&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java Thu Jul 21 20:26:39 2011
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.StringList;
+
+import org.junit.Test;
+
+public class DictionaryAsSetCaseInsensitiveTest {
+
+ private Dictionary getDict() {
+ return new Dictionary(false);
+ }
+
+ private StringList asSL(String str) {
+ return new StringList(str);
+ }
+
+ /**
+ * Tests a basic lookup.
+ */
+ @Test
+ public void testLookup() {
+
+ String a = "a";
+ String b = "b";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertFalse(set.contains(b));
+
+ assertTrue(set.contains(a.toUpperCase()));
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSet() {
+
+ String a = "a";
+ String a1 = "a";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+ dict.put(asSL(a1));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertEquals(1, set.size());
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSetDiffCase() {
+
+ String a = "a";
+ String a1 = "A";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+ dict.put(asSL(a1));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertEquals(1, set.size());
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEquals() {
+ String entry1 = "1a";
+ String entry2 = "1b";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+ dictA.put(asSL(entry2));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1));
+ dictB.put(asSL(entry2));
+
+ Set<String> setB = dictB.asStringSet();
+
+ assertTrue(setA.equals(setB));
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEqualsDifferentCase() {
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL("1a"));
+ dictA.put(asSL("1b"));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL("1A"));
+ dictB.put(asSL("1B"));
+
+ Set<String> setB = dictB.asStringSet();
+
+ assertTrue(setA.equals(setB));
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCode() {
+ String entry1 = "a1";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1));
+
+ Set<String> setB = dictB.asStringSet();
+
+ assertEquals(setA.hashCode(), setB.hashCode());
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCodeDifferentCase() {
+ String entry1 = "a1";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1.toUpperCase()));
+
+ Set<String> setB = dictB.asStringSet();
+
+ // TODO: should it be equal??
+ assertNotSame(setA.hashCode(), setB.hashCode());
+ }
+
+ /**
+ * Tests the lookup of tokens of different case.
+ */
+ @Test
+ public void testDifferentCaseLookup() {
+
+ String entry1 = "1a";
+ String entry2 = "1A";
+
+ // create a case sensitive dictionary
+ Dictionary dict = getDict();
+
+ dict.put(asSL(entry1));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(entry2));
+ }
+
+ /**
+ * Tests the iterator implementation
+ */
+ @Test
+ public void testIterator() {
+
+ String entry1 = "1a";
+ String entry2 = "1b";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+ dictA.put(asSL(entry2));
+ dictA.put(asSL(entry1.toUpperCase()));
+ dictA.put(asSL(entry2.toUpperCase()));
+
+ Iterator<String> it = dictA.asStringSet().iterator();
+ List<String> elements = new ArrayList<String>();
+ while (it.hasNext()) {
+ elements.add(it.next());
+ }
+
+ assertEquals(2, elements.size());
+ assertTrue(elements.contains(entry1));
+ assertTrue(elements.contains(entry2));
+
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseInsensitiveTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java?rev=1149342&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java Thu Jul 21 20:26:39 2011
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.dictionary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import opennlp.tools.util.StringList;
+
+import org.junit.Test;
+
+public class DictionaryAsSetCaseSensitiveTest {
+
+ private Dictionary getDict() {
+ return new Dictionary(true);
+ }
+
+ private StringList asSL(String str) {
+ return new StringList(str);
+ }
+
+ /**
+ * Tests a basic lookup.
+ */
+ @Test
+ public void testLookup() {
+
+ String a = "a";
+ String b = "b";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertFalse(set.contains(b));
+
+ assertFalse(set.contains(a.toUpperCase()));
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSet() {
+
+ String a = "a";
+ String a1 = "a";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+ dict.put(asSL(a1));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertEquals(1, set.size());
+ }
+
+ /**
+ * Tests set.
+ */
+ @Test
+ public void testSetDiffCase() {
+
+ String a = "a";
+ String a1 = "A";
+
+ Dictionary dict = getDict();
+
+ dict.put(asSL(a));
+ dict.put(asSL(a1));
+
+ Set<String> set = dict.asStringSet();
+
+ assertTrue(set.contains(a));
+ assertEquals(2, set.size());
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEquals() {
+ String entry1 = "1a";
+ String entry2 = "1b";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+ dictA.put(asSL(entry2));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1));
+ dictB.put(asSL(entry2));
+
+ Set<String> setB = dictB.asStringSet();
+
+ assertTrue(setA.equals(setB));
+ }
+
+ /**
+ * Tests for the {@link Dictionary#equals(Object)} method.
+ */
+ @Test
+ public void testEqualsDifferentCase() {
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL("1a"));
+ dictA.put(asSL("1b"));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL("1A"));
+ dictB.put(asSL("1B"));
+
+ Set<String> setB = dictB.asStringSet();
+
+ // should fail in case sensitive dict
+ assertFalse(setA.equals(setB));
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCode() {
+ String entry1 = "a1";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1));
+
+ Set<String> setB = dictB.asStringSet();
+
+ assertEquals(setA.hashCode(), setB.hashCode());
+ }
+
+ /**
+ * Tests the {@link Dictionary#hashCode()} method.
+ */
+ @Test
+ public void testHashCodeDifferentCase() {
+ String entry1 = "a1";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+
+ Set<String> setA = dictA.asStringSet();
+
+ Dictionary dictB = getDict();
+ dictB.put(asSL(entry1.toUpperCase()));
+
+ Set<String> setB = dictB.asStringSet();
+
+ // TODO: should it be equal??
+ assertNotSame(setA.hashCode(), setB.hashCode());
+ }
+
+ /**
+ * Tests the lookup of tokens of different case.
+ */
+ @Test
+ public void testDifferentCaseLookup() {
+
+ String entry1 = "1a";
+ String entry2 = "1A";
+
+ // create a case sensitive dictionary
+ Dictionary dict = getDict();
+
+ dict.put(asSL(entry1));
+
+ Set<String> set = dict.asStringSet();
+
+ // should return false because 1a != 1A in a case sensitive lookup
+ assertFalse(set.contains(entry2));
+ }
+
+ /**
+ * Tests the iterator implementation
+ */
+ @Test
+ public void testIterator() {
+
+ String entry1 = "1a";
+ String entry2 = "1b";
+
+ Dictionary dictA = getDict();
+ dictA.put(asSL(entry1));
+ dictA.put(asSL(entry2));
+ dictA.put(asSL(entry1.toUpperCase()));
+ dictA.put(asSL(entry2.toUpperCase()));
+
+ Iterator<String> it = dictA.asStringSet().iterator();
+ List<String> elements = new ArrayList<String>();
+ while (it.hasNext()) {
+ elements.add(it.next());
+ }
+
+ assertEquals(4, elements.size());
+ assertTrue(elements.contains(entry1));
+ assertTrue(elements.contains(entry2));
+ assertTrue(elements.contains(entry1.toUpperCase()));
+ assertTrue(elements.contains(entry2.toUpperCase()));
+
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryAsSetCaseSensitiveTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain