You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2013/01/29 16:31:26 UTC

svn commit: r1439943 - in /jena/trunk/jena-arq/src: main/java/org/apache/jena/atlas/lib/Trie.java test/java/org/apache/jena/atlas/lib/TestTrie.java

Author: rvesse
Date: Tue Jan 29 15:31:26 2013
New Revision: 1439943

URL: http://svn.apache.org/viewvc?rev=1439943&view=rev
Log:
Modify Trie to make it sparse and lazily initialized as far as possible.

Add longestMatch(), shortestMatch() and partialSearch() methods for different kinds of Trie lookup.

Expand test coverage for Tries

Modified:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/lib/Trie.java
    jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/lib/TestTrie.java

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/lib/Trie.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/lib/Trie.java?rev=1439943&r1=1439942&r2=1439943&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/lib/Trie.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/lib/Trie.java Tue Jan 29 15:31:26 2013
@@ -28,10 +28,14 @@ import java.util.Map;
  * value type optimized for fast prefix search and match. If you do not need
  * prefix search then you should typically use a standard {@link Map} instead.
  * <p>
- * A Trie cannot store null values since null is used as an internal marker to indicate
- * that there is no value associated with a key.  This is necessary because the nature
- * of the data structure means that adding a key potentially adds multiple keys many of
- * which will not be associated with a value.
+ * The empty or null string may be used as a special key to refer to the root
+ * node of the trie.
+ * </p>
+ * <p>
+ * A Trie cannot store null values since null is used as an internal marker to
+ * indicate that there is no value associated with a key. This is necessary
+ * because the nature of the data structure means that adding a key potentially
+ * adds multiple keys many of which will not be associated with a value.
  * </p>
  * 
  * @param <T>
@@ -70,6 +74,8 @@ public final class Trie<T> {
      */
     private TrieNode<T> moveToNode(String key) {
         TrieNode<T> current = this.root;
+        if (key == null)
+            return current;
         for (int i = 0; i < key.length(); i++) {
             current = current.moveToChild(key.charAt(i));
         }
@@ -85,6 +91,8 @@ public final class Trie<T> {
      */
     private TrieNode<T> find(String key) {
         TrieNode<T> current = this.root;
+        if (key == null)
+            return current;
         for (int i = 0; i < key.length(); i++) {
             current = current.getChild(key.charAt(i));
             if (current == null)
@@ -179,7 +187,9 @@ public final class Trie<T> {
 
     /**
      * Performs a prefix search and returns all values mapped under the given
-     * prefix
+     * prefix. The entirety of the prefix must be matches, if you only want part
+     * of the prefix to be matched use the {@link #partialSearch(String)} method
+     * instead.
      * 
      * @param prefix
      *            Prefix
@@ -193,11 +203,102 @@ public final class Trie<T> {
     }
 
     /**
+     * Performs a search and returns any value associated with any partial or
+     * whole prefix of the key
+     * 
+     * @param key
+     *            Key
+     * @return List of values associated with any partial prefix of the key
+     */
+    public List<T> partialSearch(String key) {
+        List<T> values = new ArrayList<T>();
+        TrieNode<T> current = this.root;
+        if (key == null) {
+            if (current.hasValue())
+                values.add(current.getValue());
+        } else {
+            for (int i = 0; i < key.length(); i++) {
+                if (current.hasValue())
+                    values.add(current.getValue());
+                current = current.getChild(key.charAt(i));
+                if (current == null)
+                    return Collections.unmodifiableList(values);
+            }
+            
+            // If we reach here current is the complete key match
+            // so make sure to include it in the values list
+            if (current.hasValue()) {
+                values.add(current.getValue());
+            }
+            
+        }
+        return Collections.unmodifiableList(values);
+    }
+
+    /**
+     * Finds the shortest match for a given key i.e. returns the value
+     * associated with the shortest prefix of the key that has a value
+     * 
+     * @param key
+     *            Key
+     * @return Shortest Match or null if no possible matches
+     */
+    public T shortestMatch(String key) {
+        TrieNode<T> current = this.root;
+        if (key == null)
+            return current.getValue();
+        for (int i = 0; i < key.length(); i++) {
+            if (current.hasValue())
+                break;
+            current = current.getChild(key.charAt(i));
+            if (current == null)
+                return null;
+        }
+        return current.getValue();
+    }
+
+    /**
+     * Finds the longest match for a given key i.e. returns the value associated
+     * with the longest prefix of the key that has a value
+     * 
+     * @param key
+     *            Key
+     * @return Longest Match or null if no possible matches
+     */
+    public T longestMatch(String key) {
+        T value = null;
+        TrieNode<T> current = this.root;
+        if (key == null) {
+            return current.getValue();
+        } else {
+            for (int i = 0; i < key.length(); i++) {
+                if (current == null)
+                    return value;
+                if (current.hasValue())
+                    value = current.getValue();
+                current = current.getChild(key.charAt(i));
+            }
+        }
+        // If we reach here current is the complete key match
+        // so return its value if it has one
+        if (current.hasValue()) {
+            return current.getValue();
+        }
+        return value;
+    }
+
+    /**
      * Represents a node in the Trie
+     * <p>
+     * The implementation is designed to be sparse such that we delay creation
+     * of things at both leafs and interior nodes until they are actually needed
+     * </p>
      * 
      */
     private static class TrieNode<T> {
-        private Map<Character, TrieNode<T>> children = new HashMap<Character, TrieNode<T>>();
+        private Map<Character, TrieNode<T>> children = null;
+        private Character singletonChildChar = null;
+        private TrieNode<T> singletonChild = null;
         private T value;
 
         /**
@@ -246,7 +347,13 @@ public final class Trie<T> {
          * @return Child
          */
         public TrieNode<T> getChild(Character c) {
-            return this.children.get(c);
+            if (this.children != null) {
+                return this.children.get(c);
+            } else if (c.equals(this.singletonChildChar)) {
+                return this.singletonChild;
+            } else {
+                return null;
+            }
         }
 
         /**
@@ -257,10 +364,22 @@ public final class Trie<T> {
          * @return Child
          */
         public TrieNode<T> moveToChild(Character c) {
-            TrieNode<T> n = this.children.get(c);
+            TrieNode<T> n = this.getChild(c);
             if (n == null) {
                 n = new TrieNode<T>(null);
-                this.children.put(c, n);
+                if (this.children != null) {
+                    // Add to existing map
+                    this.children.put(c, n);
+                } else if (this.singletonChildChar != null) {
+                    // Need to lazily create map
+                    this.children = new HashMap<Character, Trie.TrieNode<T>>();
+                    this.children.put(this.singletonChildChar, this.singletonChild);
+                    this.children.put(c, n);
+                } else {
+                    // Singleton child
+                    this.singletonChildChar = c;
+                    this.singletonChild = n;
+                }
             }
             return n;
         }
@@ -275,8 +394,12 @@ public final class Trie<T> {
             if (this.hasValue()) {
                 values.add(this.value);
             }
-            for (TrieNode<T> child : this.children.values()) {
-                values.addAll(child.getValues());
+            if (this.children != null) {
+                for (TrieNode<T> child : this.children.values()) {
+                    values.addAll(child.getValues());
+                }
+            } else if (this.singletonChild != null) {
+                values.addAll(this.singletonChild.getValues());
             }
             return values;
         }

Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/lib/TestTrie.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/lib/TestTrie.java?rev=1439943&r1=1439942&r2=1439943&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/lib/TestTrie.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/lib/TestTrie.java Tue Jan 29 15:31:26 2013
@@ -77,8 +77,31 @@ public class TestTrie {
     }
     
     /**
+     * Adding a null key is permitted - provides access to root of Trie
+     */
+    @Test
+    public void trie_add_05() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add(null, 123);
+        Assert.assertTrue(trie.contains(null));
+        Assert.assertEquals((Integer)123, trie.get(null));
+    }
+    
+    /**
+     * Adding an empty key is permitted - provides access to root of Trie 
+     */
+    @Test
+    public void trie_add_06() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("", 123);
+        Assert.assertTrue(trie.contains(""));
+        Assert.assertEquals((Integer)123, trie.get(""));
+    }
+    
+    /**
      * Test for non-existent key
      */
+    @Test
     public void trie_contains_01() {
         Trie<Integer> trie = new Trie<Integer>();
         Assert.assertFalse(trie.contains("test"));
@@ -100,6 +123,26 @@ public class TestTrie {
         Assert.assertFalse(trie.contains("t"));
         Assert.assertTrue(trie.contains("t", false));
     }
+    
+    /**
+     * Test for non-existent null key
+     */
+    @Test
+    public void trie_contains_03() {
+        Trie<Integer> trie = new Trie<Integer>();
+        Assert.assertFalse(trie.contains(null));
+        Assert.assertTrue(trie.contains(null, false));
+    }
+    
+    /**
+     * Test for empty key
+     */
+    @Test
+    public void trie_contains_04() {
+        Trie<Integer> trie = new Trie<Integer>();
+        Assert.assertFalse(trie.contains(""));
+        Assert.assertTrue(trie.contains("", false));
+    }
 
     /**
      * Removing a key value
@@ -144,6 +187,34 @@ public class TestTrie {
     }
     
     /**
+     * Test for removing null key - provides access to trie root
+     */
+    @Test
+    public void trie_remove_03() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add(null, 123);
+        Assert.assertTrue(trie.contains(null));
+        Assert.assertEquals((Integer)123, trie.get(null));
+        
+        trie.remove(null);
+        Assert.assertFalse(trie.contains(null));
+    }
+    
+    /**
+     * Test for removing null key - provides access to trie root
+     */
+    @Test
+    public void trie_remove_04() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("", 123);
+        Assert.assertTrue(trie.contains(""));
+        Assert.assertEquals((Integer)123, trie.get(""));
+        
+        trie.remove("");
+        Assert.assertFalse(trie.contains(""));
+    }
+    
+    /**
      * Test prefix search
      */
     @Test
@@ -163,5 +234,113 @@ public class TestTrie {
         //Prefix search on "testingly" should return no values
         matches = trie.prefixSearch("testingly");
         Assert.assertEquals(0, matches.size());
+        
+        //Prefix search on null key should give two values
+        matches = trie.prefixSearch(null);
+        Assert.assertEquals(2, matches.size());
+    }
+    
+    /**
+     * Test partial search
+     */
+    @Test
+    public void trie_partial_search_01() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        
+        //Partial search on "test" should return one values
+        List<Integer> matches = trie.partialSearch("test");
+        Assert.assertEquals(1, matches.size());
+        
+        //Prefix search on "testi" should return one values
+        matches = trie.partialSearch("testi");
+        Assert.assertEquals(1, matches.size());
+        
+        //Prefix search on "testingly" should return two values
+        matches = trie.partialSearch("testingly");
+        Assert.assertEquals(2, matches.size());
+        
+        //Prefix search on null key should give no values
+        matches = trie.partialSearch(null);
+        Assert.assertEquals(0, matches.size());
+    }
+    
+    /**
+     * Test longest match
+     */
+    @Test
+    public void trie_longest_match_01() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        Assert.assertTrue(trie.contains("test"));
+        Assert.assertTrue(trie.contains("testing"));
+        
+        Assert.assertEquals((Integer)456, trie.longestMatch("testing"));
+    }
+    
+    /**
+     * Test longest match
+     */
+    @Test
+    public void trie_longest_match_02() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        
+        Assert.assertEquals((Integer)456, trie.longestMatch("testingly"));
+    }
+    
+    /**
+     * Test longest match
+     */
+    @Test
+    public void trie_longest_match_03() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        trie.remove("testing");
+        
+        Assert.assertEquals((Integer)123, trie.longestMatch("testing"));
+    }
+    
+    /**
+     * Test shortest match
+     */
+    @Test
+    public void trie_shortest_match_01() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        Assert.assertTrue(trie.contains("test"));
+        Assert.assertTrue(trie.contains("testing"));
+        
+        Assert.assertEquals((Integer)123, trie.shortestMatch("testing"));
+    }
+    
+    /**
+     * Test shortest match
+     */
+    @Test
+    public void trie_shortest_match_02() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        
+        Assert.assertEquals((Integer)123, trie.shortestMatch("testingly"));
+    }
+    
+    /**
+     * Test shortest match
+     */
+    @Test
+    public void trie_shortest_match_03() {
+        Trie<Integer> trie = new Trie<Integer>();
+        trie.add("test", 123);
+        trie.add("testing", 456);
+        trie.remove("test");
+        
+        Assert.assertEquals((Integer)456, trie.shortestMatch("testing"));
     }
 }