You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2011/03/01 14:04:02 UTC

svn commit: r1075804 - in /lucene/dev/trunk/solr: ./ src/java/org/apache/solr/spelling/suggest/ src/java/org/apache/solr/spelling/suggest/jaspell/ src/java/org/apache/solr/spelling/suggest/tst/ src/test-files/solr/conf/ src/test/org/apache/solr/spellin...

Author: ab
Date: Tue Mar  1 13:04:01 2011
New Revision: 1075804

URL: http://svn.apache.org/viewvc?rev=1075804&view=rev
Log:
SOLR-2375 Store & Load functionality for Suggester Lookup implementations.

Added:
    lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
    lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
    lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Mar  1 13:04:01 2011
@@ -175,6 +175,10 @@ Other Changes
   using Generics where applicable in method/object declatations, and
   adding @SuppressWarnings("unchecked") when appropriate (hossman)
 
+* SOLR-2375: Suggester Lookup implementations now store trie data
+  and load it back on init. This means that large tries don't have to be
+  rebuilt on every commit or core reload. (ab)
+
 Documentation
 ----------------------
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java Tue Mar  1 13:04:01 2011
@@ -83,6 +83,8 @@ public class Suggester extends SolrSpell
     if (lookupImpl == null) {
       lookupImpl = JaspellLookup.class.getName();
     }
+    lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
+    lookup.init(config, core);
     String store = (String)config.get(STORE_DIR);
     if (store != null) {
       storeDir = new File(store);
@@ -91,6 +93,13 @@ public class Suggester extends SolrSpell
       }
       if (!storeDir.exists()) {
         storeDir.mkdirs();
+      } else {
+        // attempt reload of the stored lookup
+        try {
+          lookup.load(storeDir);
+        } catch (IOException e) {
+          LOG.warn("Loading stored lookup data failed", e);
+        }
       }
     }
     return name;
@@ -107,17 +116,17 @@ public class Suggester extends SolrSpell
         dictionary = new FileDictionary(new InputStreamReader(
                 core.getResourceLoader().openResource(sourceLocation), "UTF-8"));
       } catch (UnsupportedEncodingException e) {
-        e.printStackTrace();
+        // should not happen
+        LOG.error("should not happen", e);
       }
     }
-    lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
     try {
       lookup.build(dictionary);
       if (storeDir != null) {
         lookup.store(storeDir);
       }
     } catch (Exception e) {
-      e.printStackTrace();
+      LOG.error("Error while building or storing Suggester data", e);
     }
   }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java Tue Mar  1 13:04:01 2011
@@ -1,6 +1,10 @@
 package org.apache.solr.spelling.suggest.jaspell;
 
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -9,6 +13,7 @@ import org.apache.solr.common.util.Named
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.spelling.suggest.Lookup;
 import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
+import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
 import org.apache.solr.util.SortedIterator;
 import org.apache.solr.util.TermFreqIterator;
 import org.slf4j.Logger;
@@ -16,7 +21,7 @@ import org.slf4j.LoggerFactory;
 
 public class JaspellLookup extends Lookup {
   private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
-  JaspellTernarySearchTrie trie;
+  JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
   private boolean usePrefix = true;
   private int editDistance = 2;
 
@@ -89,14 +94,89 @@ public class JaspellLookup extends Looku
     return res;
   }
 
+  public static final String FILENAME = "jaspell.dat";
+  private static final byte LO_KID = 0x01;
+  private static final byte EQ_KID = 0x02;
+  private static final byte HI_KID = 0x04;
+  private static final byte HAS_VALUE = 0x08;
+ 
+  
   @Override
   public boolean load(File storeDir) throws IOException {
-    return false;
+    File data = new File(storeDir, FILENAME);
+    if (!data.exists() || !data.canRead()) {
+      return false;
+    }
+    DataInputStream in = new DataInputStream(new FileInputStream(data));
+    TSTNode root = trie.new TSTNode('\0', null);
+    try {
+      readRecursively(in, root);
+      trie.setRoot(root);
+    } finally {
+      in.close();
+    }
+    return true;
+  }
+  
+  private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
+    node.splitchar = in.readChar();
+    byte mask = in.readByte();
+    if ((mask & HAS_VALUE) != 0) {
+      node.data = new Float(in.readFloat());
+    }
+    if ((mask & LO_KID) != 0) {
+      TSTNode kid = trie.new TSTNode('\0', node);
+      node.relatives[TSTNode.LOKID] = kid;
+      readRecursively(in, kid);
+    }
+    if ((mask & EQ_KID) != 0) {
+      TSTNode kid = trie.new TSTNode('\0', node);
+      node.relatives[TSTNode.EQKID] = kid;
+      readRecursively(in, kid);
+    }
+    if ((mask & HI_KID) != 0) {
+      TSTNode kid = trie.new TSTNode('\0', node);
+      node.relatives[TSTNode.HIKID] = kid;
+      readRecursively(in, kid);
+    }
   }
 
   @Override
   public boolean store(File storeDir) throws IOException {
-    return false;    
+    if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
+      return false;
+    }
+    TSTNode root = trie.getRoot();
+    if (root == null) { // empty tree
+      return false;
+    }
+    File data = new File(storeDir, FILENAME);
+    DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
+    try {
+      writeRecursively(out, root);
+      out.flush();
+    } finally {
+      out.close();
+    }
+    return true;
+  }
+  
+  private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
+    if (node == null) {
+      return;
+    }
+    out.writeChar(node.splitchar);
+    byte mask = 0;
+    if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
+    if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
+    if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
+    if (node.data != null) mask |= HAS_VALUE;
+    out.writeByte(mask);
+    if (node.data != null) {
+      out.writeFloat((Float)node.data);
+    }
+    writeRecursively(out, node.relatives[TSTNode.LOKID]);
+    writeRecursively(out, node.relatives[TSTNode.EQKID]);
+    writeRecursively(out, node.relatives[TSTNode.HIKID]);
   }
-
 }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java Tue Mar  1 13:04:01 2011
@@ -127,6 +127,16 @@ public class JaspellTernarySearchTrie {
    */
   public JaspellTernarySearchTrie() {
   }
+  
+  // for loading
+  void setRoot(TSTNode newRoot) {
+    rootNode = newRoot;
+  }
+  
+  // for saving
+  TSTNode getRoot() {
+    return rootNode;
+  }
 
   /**
    * Constructs a Ternary Search Trie and loads data from a <code>File</code>

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java Tue Mar  1 13:04:01 2011
@@ -1,6 +1,10 @@
 package org.apache.solr.spelling.suggest.tst;
 
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -13,8 +17,8 @@ import org.apache.solr.util.SortedIterat
 import org.apache.solr.util.TermFreqIterator;
 
 public class TSTLookup extends Lookup {
-  TernaryTreeNode root;
-  TSTAutocomplete autocomplete;
+  TernaryTreeNode root = new TernaryTreeNode();
+  TSTAutocomplete autocomplete = new TSTAutocomplete();
 
   @Override
   public void init(NamedList config, SolrCore core) {
@@ -23,7 +27,6 @@ public class TSTLookup extends Lookup {
   @Override
   public void build(TermFreqIterator tfit) throws IOException {
     root = new TernaryTreeNode();
-    autocomplete = new TSTAutocomplete();
     // buffer first
     if (!(tfit instanceof SortedIterator)) {
       // make sure it's sorted
@@ -48,7 +51,16 @@ public class TSTLookup extends Lookup {
 
   @Override
   public Object get(String key) {
-    throw new UnsupportedOperationException("get() is not supported here");
+    List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
+    if (list == null || list.isEmpty()) {
+      return null;
+    }
+    for (TernaryTreeNode n : list) {
+      if (n.token.equals(key)) {
+        return n.val;
+      }
+    }
+    return null;
   }
 
   @Override
@@ -75,15 +87,94 @@ public class TSTLookup extends Lookup {
     }
     return res;
   }
+  
+  public static final String FILENAME = "tst.dat";
+  
+  private static final byte LO_KID = 0x01;
+  private static final byte EQ_KID = 0x02;
+  private static final byte HI_KID = 0x04;
+  private static final byte HAS_TOKEN = 0x08;
+  private static final byte HAS_VALUE = 0x10;
 
   @Override
-  public boolean load(File storeDir) throws IOException {
-    return false;
+  public synchronized boolean load(File storeDir) throws IOException {
+    File data = new File(storeDir, FILENAME);
+    if (!data.exists() || !data.canRead()) {
+      return false;
+    }
+    DataInputStream in = new DataInputStream(new FileInputStream(data));
+    root = new TernaryTreeNode();
+    try {
+      readRecursively(in, root);
+    } finally {
+      in.close();
+    }
+    return true;
+  }
+  
+  // pre-order traversal
+  private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException {
+    node.splitchar = in.readChar();
+    byte mask = in.readByte();
+    if ((mask & HAS_TOKEN) != 0) {
+      node.token = in.readUTF();
+    }
+    if ((mask & HAS_VALUE) != 0) {
+      node.val = new Float(in.readFloat());
+    }
+    if ((mask & LO_KID) != 0) {
+      node.loKid = new TernaryTreeNode();
+      readRecursively(in, node.loKid);
+    }
+    if ((mask & EQ_KID) != 0) {
+      node.eqKid = new TernaryTreeNode();
+      readRecursively(in, node.eqKid);
+    }
+    if ((mask & HI_KID) != 0) {
+      node.hiKid = new TernaryTreeNode();
+      readRecursively(in, node.hiKid);
+    }
   }
 
   @Override
-  public boolean store(File storeDir) throws IOException {
-    return false;
+  public synchronized boolean store(File storeDir) throws IOException {
+    if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
+      return false;
+    }
+    File data = new File(storeDir, FILENAME);
+    DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
+    try {
+      writeRecursively(out, root);
+      out.flush();
+    } finally {
+      out.close();
+    }
+    return true;
+  }
+  
+  // pre-order traversal
+  private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException {
+    // write out the current node
+    out.writeChar(node.splitchar);
+    // prepare a mask of kids
+    byte mask = 0;
+    if (node.eqKid != null) mask |= EQ_KID;
+    if (node.loKid != null) mask |= LO_KID;
+    if (node.hiKid != null) mask |= HI_KID;
+    if (node.token != null) mask |= HAS_TOKEN;
+    if (node.val != null) mask |= HAS_VALUE;
+    out.writeByte(mask);
+    if (node.token != null) out.writeUTF(node.token);
+    if (node.val != null) out.writeFloat((Float)node.val);
+    // recurse and write kids
+    if (node.loKid != null) {
+      writeRecursively(out, node.loKid);
+    }
+    if (node.eqKid != null) {
+      writeRecursively(out, node.eqKid);
+    }
+    if (node.hiKid != null) {
+      writeRecursively(out, node.hiKid);
+    }
   }
-
 }

Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml Tue Mar  1 13:04:01 2011
@@ -37,6 +37,7 @@
       <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
       <str name="lookupImpl">org.apache.solr.spelling.suggest.jaspell.JaspellLookup</str>
       <str name="field">suggest</str>
+      <str name="storeDir">suggest</str>
       <str name="buildOnCommit">true</str>
 
       <!-- Suggester properties -->

Added: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java?rev=1075804&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java Tue Mar  1 13:04:01 2011
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.spelling.suggest;
+
+import java.io.File;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
+import org.apache.solr.spelling.suggest.tst.TSTLookup;
+import org.junit.Test;
+
+public class PersistenceTest extends SolrTestCaseJ4 {
+  
+  public static final String[] keys = new String[] {
+    "one",
+    "two",
+    "three",
+    "four",
+    "oneness",
+    "onerous",
+    "onesimus",
+    "twofold",
+    "twonk",
+    "thrive",
+    "through",
+    "threat",
+    "foundation",
+    "fourier",
+    "fourty"
+  };
+
+  @Test
+  public void testTSTPersistence() throws Exception {
+    TSTLookup lookup = new TSTLookup();
+    for (String k : keys) {
+      lookup.add(k, new Float(k.length()));
+    }
+    File storeDir = new File(TEST_HOME);
+    lookup.store(storeDir);
+    lookup = new TSTLookup();
+    lookup.load(storeDir);
+    for (String k : keys) {
+      Float val = (Float)lookup.get(k);
+      assertNotNull(k, val);
+      assertEquals(k, k.length(), val.intValue());
+    }
+  }
+  
+  @Test
+  public void testJaspellPersistence() throws Exception {
+    JaspellLookup lookup = new JaspellLookup();
+    for (String k : keys) {
+      lookup.add(k, new Float(k.length()));
+    }
+    File storeDir = new File(TEST_HOME);
+    lookup.store(storeDir);
+    lookup = new JaspellLookup();
+    lookup.load(storeDir);
+    for (String k : keys) {
+      Float val = (Float)lookup.get(k);
+      assertNotNull(k, val);
+      assertEquals(k, k.length(), val.intValue());
+    }
+  }
+  
+}

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java Tue Mar  1 13:04:01 2011
@@ -27,6 +27,7 @@ import org.apache.solr.util.TermFreqIter
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import java.io.File;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -65,6 +66,29 @@ public class SuggesterTest extends SolrT
   
   @Test
   public void testReload() throws Exception {
+    String leaveData = System.getProperty("solr.test.leavedatadir");
+    if (leaveData == null) leaveData = "";
+    System.setProperty("solr.test.leavedatadir", "true");
+    addDocs();
+    assertU(commit());
+    File data = dataDir;
+    String config = configString;
+    deleteCore();
+    dataDir = data;
+    configString = config;
+    initCore();
+    assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
+            "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
+            "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
+            "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
+        );
+    
+    // restore the property
+    System.setProperty("solr.test.leavedatadir", leaveData);
+  }
+  
+  @Test
+  public void testRebuild() throws Exception {
     addDocs();
     assertU(commit());
     assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),