You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2015/09/02 09:58:32 UTC

svn commit: r1700721 - in /jackrabbit/oak/trunk/oak-solr-core/src: main/resources/solr/oak/conf/ test/java/org/apache/jackrabbit/oak/jcr/query/ test/resources/org/apache/jackrabbit/oak/query/ test/resources/solr/oak/conf/

Author: tommaso
Date: Wed Sep  2 07:58:32 2015
New Revision: 1700721

URL: http://svn.apache.org/r1700721
Log:
OAK-3331 - support multiple words spellchecking in solr

Removed:
    jackrabbit/oak/trunk/oak-solr-core/src/test/resources/org/apache/jackrabbit/oak/query/sql1.txt
Modified:
    jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml
    jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
    jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
    jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml
    jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml

Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/schema.xml Wed Sep  2 07:58:32 2015
@@ -79,6 +79,18 @@
                 <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
             </analyzer>
         </fieldType>
+        <fieldtype name="spellcheck" class="solr.TextField">
+            <analyzer type="index">
+                <tokenizer class="solr.ClassicTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.ShingleFilterFactory" minShingleSize="2"  maxShingleSize="3"
+                        outputUnigrams="true" outputUnigramsIfNoShingles="true" tokenSeparator=" " fillerToken="*"/>
+            </analyzer>
+            <analyzer type="query">
+                <tokenizer class="solr.KeywordTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+            </analyzer>
+        </fieldtype>
 
         <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
 
@@ -118,6 +130,7 @@
         <field name=":indexed" type="tdate" indexed="true" stored="false" default="NOW" docValues="true"/>
         <field name=":suggest-weight" type="tint" indexed="false" stored="false" default="1" docValues="true"/>
         <field name=":suggest" type="string" indexed="true" stored="true" multiValued="true" />
+        <field name=":spellcheck" type="spellcheck" indexed="true" stored="false" multiValued="true" />
         <field name="path_collapsed" type="string" indexed="true" stored="false"/>
         <field name="path_depth" type="tint" indexed="true" stored="false"/>
         <field name="_version_" type="long" indexed="true" stored="true"/>
@@ -136,6 +149,8 @@
     <copyField source="path_exact" dest="path_child"/>
     <copyField source="path_exact" dest=":path"/>
     <copyField source="*" dest="catch_all"/>
+    <copyField source="jcr:title" dest=":spellcheck"/>
+    <copyField source="jcr:description" dest=":spellcheck"/>
     <copyField source="jcr:title" dest=":suggest"/>
     <copyField source="jcr:description" dest=":suggest"/>
 

Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml Wed Sep  2 07:58:32 2015
@@ -1176,12 +1176,12 @@
 
     <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
 
-        <str name="queryAnalyzerFieldType">text_general</str>
+        <str name="queryAnalyzerFieldType">spellcheck</str>
 
         <!-- a spellchecker built from a field of the main index -->
         <lst name="spellchecker">
             <str name="name">default</str>
-            <str name="field">catch_all</str>
+            <str name="field">:spellcheck</str>
             <str name="classname">solr.DirectSolrSpellChecker</str>
             <str name="distanceMeasure">internal</str>
             <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
@@ -1202,7 +1202,6 @@
 
     <requestHandler name="/spellcheck" class="solr.SearchHandler" startup="lazy">
         <lst name="defaults">
-            <str name="df">catch_all</str>
             <str name="spellcheck.dictionary">default</str>
             <str name="spellcheck">on</str>
             <str name="spellcheck.extendedResults">true</str>

Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java Wed Sep  2 07:58:32 2015
@@ -38,9 +38,9 @@ public class SpellcheckTest extends Abst
         Session session = superuser;
         QueryManager qm = session.getWorkspace().getQueryManager();
         Node n1 = testRootNode.addNode("node1");
-        n1.setProperty("text", "hello hello hello alt");
+        n1.setProperty("jcr:title", "hello hello hello alt");
         Node n2 = testRootNode.addNode("node2");
-        n2.setProperty("text", "hold");
+        n2.setProperty("jcr:title", "hold");
         session.save();
 
         String sql = "SELECT [rep:spellcheck()] FROM nt:base WHERE [jcr:path] = '/' AND SPELLCHECK('helo')";
@@ -54,9 +54,9 @@ public class SpellcheckTest extends Abst
         Session session = superuser;
         QueryManager qm = session.getWorkspace().getQueryManager();
         Node n1 = testRootNode.addNode("node1");
-        n1.setProperty("text", "hello hello hello alt");
+        n1.setProperty("jcr:title", "hello hello hello alt");
         Node n2 = testRootNode.addNode("node2");
-        n2.setProperty("text", "hold");
+        n2.setProperty("jcr:title", "hold");
         session.save();
 
         String xpath = "/jcr:root[rep:spellcheck('helo')]/(rep:spellcheck())";
@@ -66,6 +66,26 @@ public class SpellcheckTest extends Abst
         assertEquals("[hello, hold]", result);
     }
 
+    public void testSpellcheckMultipleWords() throws Exception {
+        Session session = superuser;
+        QueryManager qm = session.getWorkspace().getQueryManager();
+        Node n1 = testRootNode.addNode("node1");
+        n1.setProperty("jcr:title", "it is always a good idea to go visiting ontario");
+        Node n2 = testRootNode.addNode("node2");
+        n2.setProperty("jcr:title", "ontario is a nice place to live in");
+        Node n3 = testRootNode.addNode("node3");
+        n2.setProperty("jcr:title", "I flied to ontario for voting for the major polls");
+        Node n4 = testRootNode.addNode("node4");
+        n2.setProperty("jcr:title", "I will go voting in ontario, I always voted since I've been allowed to");
+        session.save();
+
+        String xpath = "/jcr:root[rep:spellcheck('votin in ontari')]/(rep:spellcheck())";
+        Query q = qm.createQuery(xpath, Query.XPATH);
+        String result = getResult(q.execute(), "rep:spellcheck()");
+        assertNotNull(result);
+        assertEquals("[voting in ontario]", result);
+    }
+
     static String getResult(QueryResult result, String propertyName) throws RepositoryException {
         StringBuilder buff = new StringBuilder();
         RowIterator it = result.getRows();

Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/schema.xml Wed Sep  2 07:58:32 2015
@@ -85,6 +85,18 @@
                 <filter class="solr.TrimFilterFactory"/>
             </analyzer>
         </fieldtype>
+        <fieldtype name="spellcheck" class="solr.TextField">
+            <analyzer type="index">
+                <tokenizer class="solr.ClassicTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.ShingleFilterFactory" minShingleSize="2"  maxShingleSize="3"
+                        outputUnigrams="true" outputUnigramsIfNoShingles="true" tokenSeparator=" " fillerToken="*"/>
+            </analyzer>
+            <analyzer type="query">
+                <tokenizer class="solr.KeywordTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+            </analyzer>
+        </fieldtype>
 
         <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
 
@@ -130,6 +142,7 @@
         <field name=":indexed" type="tdate" indexed="true" stored="false" default="NOW" docValues="true"/>
         <field name=":suggest-weight" type="tint" indexed="false" stored="false" default="1"  docValues="true"/>
         <field name=":suggest" type="string" indexed="true" stored="true" multiValued="true" />
+        <field name=":spellcheck" type="spellcheck" indexed="true" stored="false" multiValued="true" />
         <field name="path_collapsed" type="string" indexed="true" stored="true"/>
         <field name="path_depth" type="tint" indexed="true" stored="false"/>
         <field name="_version_" type="long" indexed="true" stored="true"/>
@@ -147,6 +160,8 @@
     <copyField source="path_exact" dest="path_child"/>
     <copyField source="path_exact" dest=":path"/>
     <copyField source="*" dest="catch_all"/>
+    <copyField source="jcr:title" dest=":spellcheck"/>
+    <copyField source="jcr:description" dest=":spellcheck"/>
     <copyField source="jcr:title" dest=":suggest"/>
     <copyField source="jcr:description" dest=":suggest"/>
 </schema>

Modified: jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml?rev=1700721&r1=1700720&r2=1700721&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/test/resources/solr/oak/conf/solrconfig.xml Wed Sep  2 07:58:32 2015
@@ -1219,7 +1219,7 @@ current implementation relies on the upd
     -->
     <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
 
-        <str name="queryAnalyzerFieldType">text_general</str>
+        <str name="queryAnalyzerFieldType">spellcheck</str>
 
         <!-- Multiple "Spell Checkers" can be declared and used by this
            component
@@ -1228,7 +1228,7 @@ current implementation relies on the upd
         <!-- a spellchecker built from a field of the main index -->
         <lst name="spellchecker">
             <str name="name">default</str>
-            <str name="field">catch_all</str>
+            <str name="field">:spellcheck</str>
             <str name="classname">solr.DirectSolrSpellChecker</str>
             <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
             <str name="distanceMeasure">internal</str>
@@ -1247,15 +1247,15 @@ current implementation relies on the upd
         </lst>
 
         <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
-        <lst name="spellchecker">
-            <str name="name">wordbreak</str>
-            <str name="classname">solr.WordBreakSolrSpellChecker</str>
-            <str name="field">name</str>
-            <str name="combineWords">true</str>
-            <str name="breakWords">true</str>
-            <int name="maxChanges">10</int>
-            <str name="buildOnCommit">true</str>
-        </lst>
+        <!--<lst name="spellchecker">-->
+            <!--<str name="name">wordbreak</str>-->
+            <!--<str name="classname">solr.WordBreakSolrSpellChecker</str>-->
+            <!--<str name="field">name</str>-->
+            <!--<str name="combineWords">true</str>-->
+            <!--<str name="breakWords">true</str>-->
+            <!--<int name="maxChanges">10</int>-->
+            <!--<str name="buildOnCommit">true</str>-->
+        <!--</lst>-->
 
         <!-- a spellchecker that uses a different distance measure -->
         <!--
@@ -1311,13 +1311,12 @@ current implementation relies on the upd
     -->
     <requestHandler name="/spellcheck" class="solr.SearchHandler" startup="lazy">
         <lst name="defaults">
-            <str name="df">catch_all</str>
+            <!--<str name="df">catch_all</str>-->
             <!-- Solr will use suggestions from both the 'default' spellchecker
         and from the 'wordbreak' spellchecker and combine them.
         collations (re-written queries) can include a combination of
         corrections from both spellcheckers -->
             <str name="spellcheck.dictionary">default</str>
-            <str name="spellcheck.dictionary">wordbreak</str>
             <str name="spellcheck">on</str>
             <str name="spellcheck.extendedResults">true</str>
             <str name="spellcheck.count">10</str>