You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2011/03/01 14:04:02 UTC
svn commit: r1075804 - in /lucene/dev/trunk/solr: ./
src/java/org/apache/solr/spelling/suggest/
src/java/org/apache/solr/spelling/suggest/jaspell/
src/java/org/apache/solr/spelling/suggest/tst/ src/test-files/solr/conf/
src/test/org/apache/solr/spellin...
Author: ab
Date: Tue Mar 1 13:04:01 2011
New Revision: 1075804
URL: http://svn.apache.org/viewvc?rev=1075804&view=rev
Log:
SOLR-2375 Store & Load functionality for Suggester Lookup implementations.
Added:
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Mar 1 13:04:01 2011
@@ -175,6 +175,10 @@ Other Changes
using Generics where applicable in method/object declatations, and
adding @SuppressWarnings("unchecked") when appropriate (hossman)
+* SOLR-2375: Suggester Lookup implementations now store trie data
+ and load it back on init. This means that large tries don't have to be
+ rebuilt on every commit or core reload. (ab)
+
Documentation
----------------------
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/Suggester.java Tue Mar 1 13:04:01 2011
@@ -83,6 +83,8 @@ public class Suggester extends SolrSpell
if (lookupImpl == null) {
lookupImpl = JaspellLookup.class.getName();
}
+ lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
+ lookup.init(config, core);
String store = (String)config.get(STORE_DIR);
if (store != null) {
storeDir = new File(store);
@@ -91,6 +93,13 @@ public class Suggester extends SolrSpell
}
if (!storeDir.exists()) {
storeDir.mkdirs();
+ } else {
+ // attempt reload of the stored lookup
+ try {
+ lookup.load(storeDir);
+ } catch (IOException e) {
+ LOG.warn("Loading stored lookup data failed", e);
+ }
}
}
return name;
@@ -107,17 +116,17 @@ public class Suggester extends SolrSpell
dictionary = new FileDictionary(new InputStreamReader(
core.getResourceLoader().openResource(sourceLocation), "UTF-8"));
} catch (UnsupportedEncodingException e) {
- e.printStackTrace();
+ // should not happen
+ LOG.error("should not happen", e);
}
}
- lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
try {
lookup.build(dictionary);
if (storeDir != null) {
lookup.store(storeDir);
}
} catch (Exception e) {
- e.printStackTrace();
+ LOG.error("Error while building or storing Suggester data", e);
}
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java Tue Mar 1 13:04:01 2011
@@ -1,6 +1,10 @@
package org.apache.solr.spelling.suggest.jaspell;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -9,6 +13,7 @@ import org.apache.solr.common.util.Named
import org.apache.solr.core.SolrCore;
import org.apache.solr.spelling.suggest.Lookup;
import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
+import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
import org.apache.solr.util.SortedIterator;
import org.apache.solr.util.TermFreqIterator;
import org.slf4j.Logger;
@@ -16,7 +21,7 @@ import org.slf4j.LoggerFactory;
public class JaspellLookup extends Lookup {
private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
- JaspellTernarySearchTrie trie;
+ JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
private boolean usePrefix = true;
private int editDistance = 2;
@@ -89,14 +94,89 @@ public class JaspellLookup extends Looku
return res;
}
+ public static final String FILENAME = "jaspell.dat";
+ private static final byte LO_KID = 0x01;
+ private static final byte EQ_KID = 0x02;
+ private static final byte HI_KID = 0x04;
+ private static final byte HAS_VALUE = 0x08;
+
+
@Override
public boolean load(File storeDir) throws IOException {
- return false;
+ File data = new File(storeDir, FILENAME);
+ if (!data.exists() || !data.canRead()) {
+ return false;
+ }
+ DataInputStream in = new DataInputStream(new FileInputStream(data));
+ TSTNode root = trie.new TSTNode('\0', null);
+ try {
+ readRecursively(in, root);
+ trie.setRoot(root);
+ } finally {
+ in.close();
+ }
+ return true;
+ }
+
+ private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
+ node.splitchar = in.readChar();
+ byte mask = in.readByte();
+ if ((mask & HAS_VALUE) != 0) {
+ node.data = new Float(in.readFloat());
+ }
+ if ((mask & LO_KID) != 0) {
+ TSTNode kid = trie.new TSTNode('\0', node);
+ node.relatives[TSTNode.LOKID] = kid;
+ readRecursively(in, kid);
+ }
+ if ((mask & EQ_KID) != 0) {
+ TSTNode kid = trie.new TSTNode('\0', node);
+ node.relatives[TSTNode.EQKID] = kid;
+ readRecursively(in, kid);
+ }
+ if ((mask & HI_KID) != 0) {
+ TSTNode kid = trie.new TSTNode('\0', node);
+ node.relatives[TSTNode.HIKID] = kid;
+ readRecursively(in, kid);
+ }
}
@Override
public boolean store(File storeDir) throws IOException {
- return false;
+ if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
+ return false;
+ }
+ TSTNode root = trie.getRoot();
+ if (root == null) { // empty tree
+ return false;
+ }
+ File data = new File(storeDir, FILENAME);
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
+ try {
+ writeRecursively(out, root);
+ out.flush();
+ } finally {
+ out.close();
+ }
+ return true;
+ }
+
+ private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
+ if (node == null) {
+ return;
+ }
+ out.writeChar(node.splitchar);
+ byte mask = 0;
+ if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
+ if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
+ if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
+ if (node.data != null) mask |= HAS_VALUE;
+ out.writeByte(mask);
+ if (node.data != null) {
+ out.writeFloat((Float)node.data);
+ }
+ writeRecursively(out, node.relatives[TSTNode.LOKID]);
+ writeRecursively(out, node.relatives[TSTNode.EQKID]);
+ writeRecursively(out, node.relatives[TSTNode.HIKID]);
}
-
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java Tue Mar 1 13:04:01 2011
@@ -127,6 +127,16 @@ public class JaspellTernarySearchTrie {
*/
public JaspellTernarySearchTrie() {
}
+
+ // for loading
+ void setRoot(TSTNode newRoot) {
+ rootNode = newRoot;
+ }
+
+ // for saving
+ TSTNode getRoot() {
+ return rootNode;
+ }
/**
* Constructs a Ternary Search Trie and loads data from a <code>File</code>
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java Tue Mar 1 13:04:01 2011
@@ -1,6 +1,10 @@
package org.apache.solr.spelling.suggest.tst;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -13,8 +17,8 @@ import org.apache.solr.util.SortedIterat
import org.apache.solr.util.TermFreqIterator;
public class TSTLookup extends Lookup {
- TernaryTreeNode root;
- TSTAutocomplete autocomplete;
+ TernaryTreeNode root = new TernaryTreeNode();
+ TSTAutocomplete autocomplete = new TSTAutocomplete();
@Override
public void init(NamedList config, SolrCore core) {
@@ -23,7 +27,6 @@ public class TSTLookup extends Lookup {
@Override
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
- autocomplete = new TSTAutocomplete();
// buffer first
if (!(tfit instanceof SortedIterator)) {
// make sure it's sorted
@@ -48,7 +51,16 @@ public class TSTLookup extends Lookup {
@Override
public Object get(String key) {
- throw new UnsupportedOperationException("get() is not supported here");
+ List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
+ if (list == null || list.isEmpty()) {
+ return null;
+ }
+ for (TernaryTreeNode n : list) {
+ if (n.token.equals(key)) {
+ return n.val;
+ }
+ }
+ return null;
}
@Override
@@ -75,15 +87,94 @@ public class TSTLookup extends Lookup {
}
return res;
}
+
+ public static final String FILENAME = "tst.dat";
+
+ private static final byte LO_KID = 0x01;
+ private static final byte EQ_KID = 0x02;
+ private static final byte HI_KID = 0x04;
+ private static final byte HAS_TOKEN = 0x08;
+ private static final byte HAS_VALUE = 0x10;
@Override
- public boolean load(File storeDir) throws IOException {
- return false;
+ public synchronized boolean load(File storeDir) throws IOException {
+ File data = new File(storeDir, FILENAME);
+ if (!data.exists() || !data.canRead()) {
+ return false;
+ }
+ DataInputStream in = new DataInputStream(new FileInputStream(data));
+ root = new TernaryTreeNode();
+ try {
+ readRecursively(in, root);
+ } finally {
+ in.close();
+ }
+ return true;
+ }
+
+ // pre-order traversal
+ private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException {
+ node.splitchar = in.readChar();
+ byte mask = in.readByte();
+ if ((mask & HAS_TOKEN) != 0) {
+ node.token = in.readUTF();
+ }
+ if ((mask & HAS_VALUE) != 0) {
+ node.val = new Float(in.readFloat());
+ }
+ if ((mask & LO_KID) != 0) {
+ node.loKid = new TernaryTreeNode();
+ readRecursively(in, node.loKid);
+ }
+ if ((mask & EQ_KID) != 0) {
+ node.eqKid = new TernaryTreeNode();
+ readRecursively(in, node.eqKid);
+ }
+ if ((mask & HI_KID) != 0) {
+ node.hiKid = new TernaryTreeNode();
+ readRecursively(in, node.hiKid);
+ }
}
@Override
- public boolean store(File storeDir) throws IOException {
- return false;
+ public synchronized boolean store(File storeDir) throws IOException {
+ if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
+ return false;
+ }
+ File data = new File(storeDir, FILENAME);
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
+ try {
+ writeRecursively(out, root);
+ out.flush();
+ } finally {
+ out.close();
+ }
+ return true;
+ }
+
+ // pre-order traversal
+ private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException {
+ // write out the current node
+ out.writeChar(node.splitchar);
+ // prepare a mask of kids
+ byte mask = 0;
+ if (node.eqKid != null) mask |= EQ_KID;
+ if (node.loKid != null) mask |= LO_KID;
+ if (node.hiKid != null) mask |= HI_KID;
+ if (node.token != null) mask |= HAS_TOKEN;
+ if (node.val != null) mask |= HAS_VALUE;
+ out.writeByte(mask);
+ if (node.token != null) out.writeUTF(node.token);
+ if (node.val != null) out.writeFloat((Float)node.val);
+ // recurse and write kids
+ if (node.loKid != null) {
+ writeRecursively(out, node.loKid);
+ }
+ if (node.eqKid != null) {
+ writeRecursively(out, node.eqKid);
+ }
+ if (node.hiKid != null) {
+ writeRecursively(out, node.hiKid);
+ }
}
-
}
Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/solrconfig-spellchecker.xml Tue Mar 1 13:04:01 2011
@@ -37,6 +37,7 @@
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.jaspell.JaspellLookup</str>
<str name="field">suggest</str>
+ <str name="storeDir">suggest</str>
<str name="buildOnCommit">true</str>
<!-- Suggester properties -->
Added: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java?rev=1075804&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java Tue Mar 1 13:04:01 2011
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.spelling.suggest;
+
+import java.io.File;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
+import org.apache.solr.spelling.suggest.tst.TSTLookup;
+import org.junit.Test;
+
+public class PersistenceTest extends SolrTestCaseJ4 {
+
+ public static final String[] keys = new String[] {
+ "one",
+ "two",
+ "three",
+ "four",
+ "oneness",
+ "onerous",
+ "onesimus",
+ "twofold",
+ "twonk",
+ "thrive",
+ "through",
+ "threat",
+ "foundation",
+ "fourier",
+ "fourty"
+ };
+
+ @Test
+ public void testTSTPersistence() throws Exception {
+ TSTLookup lookup = new TSTLookup();
+ for (String k : keys) {
+ lookup.add(k, new Float(k.length()));
+ }
+ File storeDir = new File(TEST_HOME);
+ lookup.store(storeDir);
+ lookup = new TSTLookup();
+ lookup.load(storeDir);
+ for (String k : keys) {
+ Float val = (Float)lookup.get(k);
+ assertNotNull(k, val);
+ assertEquals(k, k.length(), val.intValue());
+ }
+ }
+
+ @Test
+ public void testJaspellPersistence() throws Exception {
+ JaspellLookup lookup = new JaspellLookup();
+ for (String k : keys) {
+ lookup.add(k, new Float(k.length()));
+ }
+ File storeDir = new File(TEST_HOME);
+ lookup.store(storeDir);
+ lookup = new JaspellLookup();
+ lookup.load(storeDir);
+ for (String k : keys) {
+ Float val = (Float)lookup.get(k);
+ assertNotNull(k, val);
+ assertEquals(k, k.length(), val.intValue());
+ }
+ }
+
+}
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java?rev=1075804&r1=1075803&r2=1075804&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTest.java Tue Mar 1 13:04:01 2011
@@ -27,6 +27,7 @@ import org.apache.solr.util.TermFreqIter
import org.junit.BeforeClass;
import org.junit.Test;
+import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -65,6 +66,29 @@ public class SuggesterTest extends SolrT
@Test
public void testReload() throws Exception {
+ String leaveData = System.getProperty("solr.test.leavedatadir");
+ if (leaveData == null) leaveData = "";
+ System.setProperty("solr.test.leavedatadir", "true");
+ addDocs();
+ assertU(commit());
+ File data = dataDir;
+ String config = configString;
+ deleteCore();
+ dataDir = data;
+ configString = config;
+ initCore();
+ assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/int[@name='numFound'][.='2']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[1][.='acquire']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='ac']/arr[@name='suggestion']/str[2][.='accommodate']"
+ );
+
+ // restore the property
+ System.setProperty("solr.test.leavedatadir", leaveData);
+ }
+
+ @Test
+ public void testRebuild() throws Exception {
addDocs();
assertU(commit());
assertQ(req("qt","/suggest", "q","ac", SpellingParams.SPELLCHECK_COUNT, "2", SpellingParams.SPELLCHECK_ONLY_MORE_POPULAR, "true"),