You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2015/09/02 09:58:32 UTC
svn commit: r1700721 - in /jackrabbit/oak/trunk/oak-solr-core/src:
main/resources/solr/oak/conf/ test/java/org/apache/jackrabbit/oak/jcr/query/
test/resources/org/apache/jackrabbit/oak/query/ test/resources/solr/oak/conf/
Author: tommaso
Date: Wed Sep 2 07:58:32 2015
New Revision: 1700721
URL: http://svn.apache.org/r1700721
Log:
OAK-3331 - support multiple words spellchecking in solr
Removed:
jackrabbit/oak/trunk/oak-solr-core/src/test/resources/org/apache/jackrabbit/oak/query/sql1.txt
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml
jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml
Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml Wed Sep 2 07:58:32 2015
@@ -79,6 +79,18 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
+ <fieldtype name="spellcheck" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.ClassicTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="3"
+ outputUnigrams="true" outputUnigramsIfNoShingles="true" tokenSeparator=" " fillerToken="*"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
@@ -118,6 +130,7 @@
<field name=":indexed" type="tdate" indexed="true" stored="false" default="NOW" docValues="true"/>
<field name=":suggest-weight" type="tint" indexed="false" stored="false" default="1" docValues="true"/>
<field name=":suggest" type="string" indexed="true" stored="true" multiValued="true" />
+ <field name=":spellcheck" type="spellcheck" indexed="true" stored="false" multiValued="true" />
<field name="path_collapsed" type="string" indexed="true" stored="false"/>
<field name="path_depth" type="tint" indexed="true" stored="false"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
@@ -136,6 +149,8 @@
<copyField source="path_exact" dest="path_child"/>
<copyField source="path_exact" dest=":path"/>
<copyField source="*" dest="catch_all"/>
+ <copyField source="jcr:title" dest=":spellcheck"/>
+ <copyField source="jcr:description" dest=":spellcheck"/>
<copyField source="jcr:title" dest=":suggest"/>
<copyField source="jcr:description" dest=":suggest"/>
Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml Wed Sep 2 07:58:32 2015
@@ -1176,12 +1176,12 @@
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
- <str name="queryAnalyzerFieldType">text_general</str>
+ <str name="queryAnalyzerFieldType">spellcheck</str>
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
- <str name="field">catch_all</str>
+ <str name="field">:spellcheck</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
@@ -1202,7 +1202,6 @@
<requestHandler name="/spellcheck" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
- <str name="df">catch_all</str>
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java Wed Sep 2 07:58:32 2015
@@ -38,9 +38,9 @@ public class SpellcheckTest extends Abst
Session session = superuser;
QueryManager qm = session.getWorkspace().getQueryManager();
Node n1 = testRootNode.addNode("node1");
- n1.setProperty("text", "hello hello hello alt");
+ n1.setProperty("jcr:title", "hello hello hello alt");
Node n2 = testRootNode.addNode("node2");
- n2.setProperty("text", "hold");
+ n2.setProperty("jcr:title", "hold");
session.save();
String sql = "SELECT [rep:spellcheck()] FROM nt:base WHERE [jcr:path] = '/' AND SPELLCHECK('helo')";
@@ -54,9 +54,9 @@ public class SpellcheckTest extends Abst
Session session = superuser;
QueryManager qm = session.getWorkspace().getQueryManager();
Node n1 = testRootNode.addNode("node1");
- n1.setProperty("text", "hello hello hello alt");
+ n1.setProperty("jcr:title", "hello hello hello alt");
Node n2 = testRootNode.addNode("node2");
- n2.setProperty("text", "hold");
+ n2.setProperty("jcr:title", "hold");
session.save();
String xpath = "/jcr:root[rep:spellcheck('helo')]/(rep:spellcheck())";
@@ -66,6 +66,26 @@ public class SpellcheckTest extends Abst
assertEquals("[hello, hold]", result);
}
+ public void testSpellcheckMultipleWords() throws Exception {
+ Session session = superuser;
+ QueryManager qm = session.getWorkspace().getQueryManager();
+ Node n1 = testRootNode.addNode("node1");
+ n1.setProperty("jcr:title", "it is always a good idea to go visiting ontario");
+ Node n2 = testRootNode.addNode("node2");
+ n2.setProperty("jcr:title", "ontario is a nice place to live in");
+ Node n3 = testRootNode.addNode("node3");
+ n2.setProperty("jcr:title", "I flied to ontario for voting for the major polls");
+ Node n4 = testRootNode.addNode("node4");
+ n2.setProperty("jcr:title", "I will go voting in ontario, I always voted since I've been allowed to");
+ session.save();
+
+ String xpath = "/jcr:root[rep:spellcheck('votin in ontari')]/(rep:spellcheck())";
+ Query q = qm.createQuery(xpath, Query.XPATH);
+ String result = getResult(q.execute(), "rep:spellcheck()");
+ assertNotNull(result);
+ assertEquals("[voting in ontario]", result);
+ }
+
static String getResult(QueryResult result, String propertyName) throws RepositoryException {
StringBuilder buff = new StringBuilder();
RowIterator it = result.getRows();
Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml Wed Sep 2 07:58:32 2015
@@ -85,6 +85,18 @@
<filter class="solr.TrimFilterFactory"/>
</analyzer>
</fieldtype>
+ <fieldtype name="spellcheck" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.ClassicTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="3"
+ outputUnigrams="true" outputUnigramsIfNoShingles="true" tokenSeparator=" " fillerToken="*"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
@@ -130,6 +142,7 @@
<field name=":indexed" type="tdate" indexed="true" stored="false" default="NOW" docValues="true"/>
<field name=":suggest-weight" type="tint" indexed="false" stored="false" default="1" docValues="true"/>
<field name=":suggest" type="string" indexed="true" stored="true" multiValued="true" />
+ <field name=":spellcheck" type="spellcheck" indexed="true" stored="false" multiValued="true" />
<field name="path_collapsed" type="string" indexed="true" stored="true"/>
<field name="path_depth" type="tint" indexed="true" stored="false"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
@@ -147,6 +160,8 @@
<copyField source="path_exact" dest="path_child"/>
<copyField source="path_exact" dest=":path"/>
<copyField source="*" dest="catch_all"/>
+ <copyField source="jcr:title" dest=":spellcheck"/>
+ <copyField source="jcr:description" dest=":spellcheck"/>
<copyField source="jcr:title" dest=":suggest"/>
<copyField source="jcr:description" dest=":suggest"/>
</schema>
Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml Wed Sep 2 07:58:32 2015
@@ -1219,7 +1219,7 @@ current implementation relies on the upd
-->
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
- <str name="queryAnalyzerFieldType">text_general</str>
+ <str name="queryAnalyzerFieldType">spellcheck</str>
<!-- Multiple "Spell Checkers" can be declared and used by this
component
@@ -1228,7 +1228,7 @@ current implementation relies on the upd
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
- <str name="field">catch_all</str>
+ <str name="field">:spellcheck</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
@@ -1247,15 +1247,15 @@ current implementation relies on the upd
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
- <lst name="spellchecker">
- <str name="name">wordbreak</str>
- <str name="classname">solr.WordBreakSolrSpellChecker</str>
- <str name="field">name</str>
- <str name="combineWords">true</str>
- <str name="breakWords">true</str>
- <int name="maxChanges">10</int>
- <str name="buildOnCommit">true</str>
- </lst>
+ <!--<lst name="spellchecker">-->
+ <!--<str name="name">wordbreak</str>-->
+ <!--<str name="classname">solr.WordBreakSolrSpellChecker</str>-->
+ <!--<str name="field">name</str>-->
+ <!--<str name="combineWords">true</str>-->
+ <!--<str name="breakWords">true</str>-->
+ <!--<int name="maxChanges">10</int>-->
+ <!--<str name="buildOnCommit">true</str>-->
+ <!--</lst>-->
<!-- a spellchecker that uses a different distance measure -->
<!--
@@ -1311,13 +1311,12 @@ current implementation relies on the upd
-->
<requestHandler name="/spellcheck" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
- <str name="df">catch_all</str>
+ <!--<str name="df">catch_all</str>-->
<!-- Solr will use suggestions from both the 'default' spellchecker
and from the 'wordbreak' spellchecker and combine them.
collations (re-written queries) can include a combination of
corrections from both spellcheckers -->
<str name="spellcheck.dictionary">default</str>
- <str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>