You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2014/04/28 16:45:32 UTC
svn commit: r1590660 - in /jackrabbit/oak/trunk/oak-solr-core/src/main: java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java resources/solr/oak/conf/solrconfig.xml

Author: tommaso
Date: Mon Apr 28 14:45:32 2014
New Revision: 1590660

URL: http://svn.apache.org/r1590660
Log:
OAK-1774 - escaped field names in full text query expansion

Modified:
    jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
    jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml

Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java?rev=1590660&r1=1590659&r2=1590660&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java Mon Apr 28 14:45:32 2014
@@ -297,7 +297,7 @@ public class SolrQueryIndex implements F
                 if (p == null) {
                     p = configuration.getCatchAllField();
                 }
-                fullTextString.append(p);
+                fullTextString.append(partialEscape(p));
                 fullTextString.append(':');
                 String termText = term.getText();
                 if (termText.indexOf(' ') > 0) {

Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1590660&r1=1590659&r2=1590660&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml Mon Apr 28 14:45:32 2014
@@ -196,6 +196,7 @@
         <!--
         <mergeFactor>10</mergeFactor>
         -->
+
         <!-- Expert: Merge Scheduler
              The Merge Scheduler in Lucene controls how merges are
              performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
@@ -794,8 +795,9 @@
           -->
         <lst name="defaults">
             <str name="echoParams">explicit</str>
+            <str name="wt">json</str>
             <int name="rows">10</int>
-            <str name="df">text</str>
+            <str name="df">catch_all</str>
         </lst>
         <!-- In addition to defaults, "appends" params can be specified
              to identify values which should be appended to the list of
@@ -882,102 +884,7 @@
     </requestHandler>
 
 
-    <!-- A Robust Example
 
-         This example SearchHandler declaration shows off usage of the
-         SearchHandler with many defaults declared
-
-         Note that multiple instances of the same Request Handler
-         (SearchHandler) can be registered multiple times with different
-         names (and different init parameters)
-      -->
-    <requestHandler name="/browse" class="solr.SearchHandler">
-        <lst name="defaults">
-            <str name="echoParams">explicit</str>
-
-            <!-- VelocityResponseWriter settings -->
-            <str name="wt">velocity</str>
-            <str name="v.template">browse</str>
-            <str name="v.layout">layout</str>
-            <str name="title">Solritas</str>
-
-            <!-- Query settings -->
-            <str name="defType">edismax</str>
-            <str name="qf">
-                text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-                title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
-            </str>
-            <str name="df">text</str>
-            <str name="mm">100%</str>
-            <str name="q.alt">*:*</str>
-            <str name="rows">10</str>
-            <str name="fl">*,score</str>
-
-            <str name="mlt.qf">
-                text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-                title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
-            </str>
-            <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
-            <int name="mlt.count">3</int>
-
-            <!-- Faceting defaults -->
-            <str name="facet">on</str>
-            <str name="facet.field">cat</str>
-            <str name="facet.field">manu_exact</str>
-            <str name="facet.field">content_type</str>
-            <str name="facet.field">author_s</str>
-            <str name="facet.query">ipod</str>
-            <str name="facet.query">GB</str>
-            <str name="facet.mincount">1</str>
-            <str name="facet.pivot">cat,inStock</str>
-            <str name="facet.range.other">after</str>
-            <str name="facet.range">price</str>
-            <int name="f.price.facet.range.start">0</int>
-            <int name="f.price.facet.range.end">600</int>
-            <int name="f.price.facet.range.gap">50</int>
-            <str name="facet.range">popularity</str>
-            <int name="f.popularity.facet.range.start">0</int>
-            <int name="f.popularity.facet.range.end">10</int>
-            <int name="f.popularity.facet.range.gap">3</int>
-            <str name="facet.range">manufacturedate_dt</str>
-            <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
-            <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
-            <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
-            <str name="f.manufacturedate_dt.facet.range.other">before</str>
-            <str name="f.manufacturedate_dt.facet.range.other">after</str>
-
-            <!-- Highlighting defaults -->
-            <str name="hl">on</str>
-            <str name="hl.fl">content features title name</str>
-            <str name="hl.encoder">html</str>
-            <str name="hl.simple.pre">&lt;b&gt;</str>
-            <str name="hl.simple.post">&lt;/b&gt;</str>
-            <str name="f.title.hl.fragsize">0</str>
-            <str name="f.title.hl.alternateField">title</str>
-            <str name="f.name.hl.fragsize">0</str>
-            <str name="f.name.hl.alternateField">name</str>
-            <str name="f.content.hl.snippets">3</str>
-            <str name="f.content.hl.fragsize">200</str>
-            <str name="f.content.hl.alternateField">content</str>
-            <str name="f.content.hl.maxAlternateFieldLength">750</str>
-
-            <!-- Spell checking defaults -->
-            <str name="spellcheck">on</str>
-            <str name="spellcheck.extendedResults">false</str>
-            <str name="spellcheck.count">5</str>
-            <str name="spellcheck.alternativeTermCount">2</str>
-            <str name="spellcheck.maxResultsForSuggest">5</str>
-            <str name="spellcheck.collate">true</str>
-            <str name="spellcheck.collateExtendedResults">true</str>
-            <str name="spellcheck.maxCollationTries">5</str>
-            <str name="spellcheck.maxCollations">3</str>
-        </lst>
-
-        <!-- append spellchecking to our list of components -->
-        <arr name="last-components">
-            <str>spellcheck</str>
-        </arr>
-    </requestHandler>
 
 
     <!-- Update Request Handler.
@@ -1154,14 +1061,6 @@
         <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
     </requestHandler>
 
-    <!-- Echo the request contents back to the client -->
-    <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
-        <lst name="defaults">
-            <str name="echoParams">explicit</str>
-            <str name="echoHandler">true</str>
-        </lst>
-    </requestHandler>
-
     <!-- Solr Replication
 
          The SolrReplicationHandler supports replicating indexes from a
@@ -1247,392 +1146,7 @@
 
          http://wiki.apache.org/solr/SpellCheckComponent
       -->
-    <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
-
-        <str name="queryAnalyzerFieldType">text_general</str>
-
-        <!-- Multiple "Spell Checkers" can be declared and used by this
-             component
-          -->
-
-        <!-- a spellchecker built from a field of the main index -->
-        <lst name="spellchecker">
-            <str name="name">default</str>
-            <str name="field">text</str>
-            <str name="classname">solr.DirectSolrSpellChecker</str>
-            <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
-            <str name="distanceMeasure">internal</str>
-            <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
-            <float name="accuracy">0.5</float>
-            <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
-            <int name="maxEdits">2</int>
-            <!-- the minimum shared prefix when enumerating terms -->
-            <int name="minPrefix">1</int>
-            <!-- maximum number of inspections per result. -->
-            <int name="maxInspections">5</int>
-            <!-- minimum length of a query term to be considered for correction -->
-            <int name="minQueryLength">4</int>
-            <!-- maximum threshold of documents a query term can appear to be considered for correction -->
-            <float name="maxQueryFrequency">0.01</float>
-            <!-- uncomment this to require suggestions to occur in 1% of the documents
-                <float name="thresholdTokenFrequency">.01</float>
-            -->
-        </lst>
-
-        <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
-        <lst name="spellchecker">
-            <str name="name">wordbreak</str>
-            <str name="classname">solr.WordBreakSolrSpellChecker</str>
-            <str name="field">name</str>
-            <str name="combineWords">true</str>
-            <str name="breakWords">true</str>
-            <int name="maxChanges">10</int>
-        </lst>
-
-        <!-- a spellchecker that uses a different distance measure -->
-        <!--
-           <lst name="spellchecker">
-             <str name="name">jarowinkler</str>
-             <str name="field">spell</str>
-             <str name="classname">solr.DirectSolrSpellChecker</str>
-             <str name="distanceMeasure">
-               org.apache.lucene.search.spell.JaroWinklerDistance
-             </str>
-           </lst>
-         -->
-
-        <!-- a spellchecker that use an alternate comparator
-
-             comparatorClass be one of:
-              1. score (default)
-              2. freq (Frequency first, then score)
-              3. A fully qualified class name
-          -->
-        <!--
-           <lst name="spellchecker">
-             <str name="name">freq</str>
-             <str name="field">lowerfilt</str>
-             <str name="classname">solr.DirectSolrSpellChecker</str>
-             <str name="comparatorClass">freq</str>
-          -->
-
-        <!-- A spellchecker that reads the list of words from a file -->
-        <!--
-           <lst name="spellchecker">
-             <str name="classname">solr.FileBasedSpellChecker</str>
-             <str name="name">file</str>
-             <str name="sourceLocation">spellings.txt</str>
-             <str name="characterEncoding">UTF-8</str>
-             <str name="spellcheckIndexDir">spellcheckerFile</str>
-           </lst>
-          -->
-    </searchComponent>
-
-    <!-- A request handler for demonstrating the spellcheck component.
-
-         NOTE: This is purely as an example.  The whole purpose of the
-         SpellCheckComponent is to hook it into the request handler that
-         handles your normal user queries so that a separate request is
-         not needed to get suggestions.
-
-         IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
-         NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
-
-         See http://wiki.apache.org/solr/SpellCheckComponent for details
-         on the request parameters.
-      -->
-    <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
-        <lst name="defaults">
-            <str name="df">text</str>
-            <!-- Solr will use suggestions from both the 'default' spellchecker
-                 and from the 'wordbreak' spellchecker and combine them.
-                 collations (re-written queries) can include a combination of
-                 corrections from both spellcheckers -->
-            <str name="spellcheck.dictionary">default</str>
-            <str name="spellcheck.dictionary">wordbreak</str>
-            <str name="spellcheck">on</str>
-            <str name="spellcheck.extendedResults">true</str>
-            <str name="spellcheck.count">10</str>
-            <str name="spellcheck.alternativeTermCount">5</str>
-            <str name="spellcheck.maxResultsForSuggest">5</str>
-            <str name="spellcheck.collate">true</str>
-            <str name="spellcheck.collateExtendedResults">true</str>
-            <str name="spellcheck.maxCollationTries">10</str>
-            <str name="spellcheck.maxCollations">5</str>
-        </lst>
-        <arr name="last-components">
-            <str>spellcheck</str>
-        </arr>
-    </requestHandler>
-
-    <searchComponent name="suggest" class="solr.SuggestComponent">
-        <lst name="suggester">
-            <str name="name">mySuggester</str>
-            <str name="lookupImpl">FuzzyLookupFactory</str>      <!-- org.apache.solr.spelling.suggest.fst -->
-            <str name="dictionaryImpl">DocumentDictionaryFactory</str>     <!-- org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory -->
-            <str name="field">cat</str>
-            <str name="weightField">price</str>
-            <str name="suggestAnalyzerFieldType">string</str>
-        </lst>
-    </searchComponent>
-
-    <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
-        <lst name="defaults">
-            <str name="suggest">true</str>
-            <str name="suggest.count">10</str>
-        </lst>
-        <arr name="components">
-            <str>suggest</str>
-        </arr>
-    </requestHandler>
-    <!-- Term Vector Component
-
-         http://wiki.apache.org/solr/TermVectorComponent
-      -->
-    <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
-
-    <!-- A request handler for demonstrating the term vector component
 
-         This is purely as an example.
-
-         In reality you will likely want to add the component to your
-         already specified request handlers.
-      -->
-    <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
-        <lst name="defaults">
-            <str name="df">text</str>
-            <bool name="tv">true</bool>
-        </lst>
-        <arr name="last-components">
-            <str>tvComponent</str>
-        </arr>
-    </requestHandler>
-
-    <!-- Clustering Component
-
-         You'll need to set the solr.clustering.enabled system property
-         when running solr to run with clustering enabled:
-
-              java -Dsolr.clustering.enabled=true -jar start.jar
-
-         http://wiki.apache.org/solr/ClusteringComponent
-         http://carrot2.github.io/solr-integration-strategies/
-      -->
-    <searchComponent name="clustering"
-                     enable="${solr.clustering.enabled:false}"
-                     class="solr.clustering.ClusteringComponent" >
-        <lst name="engine">
-            <str name="name">lingo</str>
-
-            <!-- Class name of a clustering algorithm compatible with the Carrot2 framework.
-
-                 Currently available open source algorithms are:
-                 * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-                 * org.carrot2.clustering.stc.STCClusteringAlgorithm
-                 * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-
-                 See http://project.carrot2.org/algorithms.html for more information.
-
-                 A commercial algorithm Lingo3G (needs to be installed separately) is defined as:
-                 * com.carrotsearch.lingo3g.Lingo3GClusteringAlgorithm
-              -->
-            <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
-            <!-- Override location of the clustering algorithm's resources
-                 (attribute definitions and lexical resources).
-
-                 A directory from which to load algorithm-specific stop words,
-                 stop labels and attribute definition XMLs.
-
-                 For an overview of Carrot2 lexical resources, see:
-                 http://download.carrot2.org/head/manual/#chapter.lexical-resources
-
-                 For an overview of Lingo3G lexical resources, see:
-                 http://download.carrotsearch.com/lingo3g/manual/#chapter.lexical-resources
-             -->
-            <str name="carrot.resourcesDir">clustering/carrot2</str>
-        </lst>
-
-        <!-- An example definition for the STC clustering algorithm. -->
-        <lst name="engine">
-            <str name="name">stc</str>
-            <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
-        </lst>
-
-        <!-- An example definition for the bisecting kmeans clustering algorithm. -->
-        <lst name="engine">
-            <str name="name">kmeans</str>
-            <str name="carrot.algorithm">org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm</str>
-        </lst>
-    </searchComponent>
-
-    <!-- A request handler for demonstrating the clustering component
-
-         This is purely as an example.
-
-         In reality you will likely want to add the component to your
-         already specified request handlers.
-      -->
-    <requestHandler name="/clustering"
-                    startup="lazy"
-                    enable="${solr.clustering.enabled:false}"
-                    class="solr.SearchHandler">
-        <lst name="defaults">
-            <bool name="clustering">true</bool>
-            <bool name="clustering.results">true</bool>
-            <!-- Field name with the logical "title" of a each document (optional) -->
-            <str name="carrot.title">name</str>
-            <!-- Field name with the logical "URL" of a each document (optional) -->
-            <str name="carrot.url">id</str>
-            <!-- Field name with the logical "content" of a each document (optional) -->
-            <str name="carrot.snippet">features</str>
-            <!-- Apply highlighter to the title/ content and use this for clustering. -->
-            <bool name="carrot.produceSummary">true</bool>
-            <!-- the maximum number of labels per cluster -->
-            <!--<int name="carrot.numDescriptions">5</int>-->
-            <!-- produce sub clusters -->
-            <bool name="carrot.outputSubClusters">false</bool>
-
-            <!-- Configure the remaining request handler parameters. -->
-            <str name="defType">edismax</str>
-            <str name="qf">
-                text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-            </str>
-            <str name="q.alt">*:*</str>
-            <str name="rows">10</str>
-            <str name="fl">*,score</str>
-        </lst>
-        <arr name="last-components">
-            <str>clustering</str>
-        </arr>
-    </requestHandler>
-
-    <!-- Terms Component
-
-         http://wiki.apache.org/solr/TermsComponent
-
-         A component to return terms and document frequency of those
-         terms
-      -->
-    <searchComponent name="terms" class="solr.TermsComponent"/>
-
-    <!-- A request handler for demonstrating the terms component -->
-    <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
-        <lst name="defaults">
-            <bool name="terms">true</bool>
-            <bool name="distrib">false</bool>
-        </lst>
-        <arr name="components">
-            <str>terms</str>
-        </arr>
-    </requestHandler>
-
-
-    <!-- Highlighting Component
-
-         http://wiki.apache.org/solr/HighlightingParameters
-      -->
-    <searchComponent class="solr.HighlightComponent" name="highlight">
-        <highlighting>
-            <!-- Configure the standard fragmenter -->
-            <!-- This could most likely be commented out in the "default" case -->
-            <fragmenter name="gap"
-                        default="true"
-                        class="solr.highlight.GapFragmenter">
-                <lst name="defaults">
-                    <int name="hl.fragsize">100</int>
-                </lst>
-            </fragmenter>
-
-            <!-- A regular-expression-based fragmenter
-                 (for sentence extraction)
-              -->
-            <fragmenter name="regex"
-                        class="solr.highlight.RegexFragmenter">
-                <lst name="defaults">
-                    <!-- slightly smaller fragsizes work better because of slop -->
-                    <int name="hl.fragsize">70</int>
-                    <!-- allow 50% slop on fragment sizes -->
-                    <float name="hl.regex.slop">0.5</float>
-                    <!-- a basic sentence pattern -->
-                    <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
-                </lst>
-            </fragmenter>
-
-            <!-- Configure the standard formatter -->
-            <formatter name="html"
-                       default="true"
-                       class="solr.highlight.HtmlFormatter">
-                <lst name="defaults">
-                    <str name="hl.simple.pre"><![CDATA[<em>]]></str>
-                    <str name="hl.simple.post"><![CDATA[</em>]]></str>
-                </lst>
-            </formatter>
-
-            <!-- Configure the standard encoder -->
-            <encoder name="html"
-                     class="solr.highlight.HtmlEncoder" />
-
-            <!-- Configure the standard fragListBuilder -->
-            <fragListBuilder name="simple"
-                             class="solr.highlight.SimpleFragListBuilder"/>
-
-            <!-- Configure the single fragListBuilder -->
-            <fragListBuilder name="single"
-                             class="solr.highlight.SingleFragListBuilder"/>
-
-            <!-- Configure the weighted fragListBuilder -->
-            <fragListBuilder name="weighted"
-                             default="true"
-                             class="solr.highlight.WeightedFragListBuilder"/>
-
-            <!-- default tag FragmentsBuilder -->
-            <fragmentsBuilder name="default"
-                              default="true"
-                              class="solr.highlight.ScoreOrderFragmentsBuilder">
-                <!--
-                <lst name="defaults">
-                  <str name="hl.multiValuedSeparatorChar">/</str>
-                </lst>
-                -->
-            </fragmentsBuilder>
-
-            <!-- multi-colored tag FragmentsBuilder -->
-            <fragmentsBuilder name="colored"
-                              class="solr.highlight.ScoreOrderFragmentsBuilder">
-                <lst name="defaults">
-                    <str name="hl.tag.pre"><![CDATA[
-               <b style="background:yellow">,<b style="background:lawgreen">,
-               <b style="background:aquamarine">,<b style="background:magenta">,
-               <b style="background:palegreen">,<b style="background:coral">,
-               <b style="background:wheat">,<b style="background:khaki">,
-               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
-                    <str name="hl.tag.post"><![CDATA[</b>]]></str>
-                </lst>
-            </fragmentsBuilder>
-
-            <boundaryScanner name="default"
-                             default="true"
-                             class="solr.highlight.SimpleBoundaryScanner">
-                <lst name="defaults">
-                    <str name="hl.bs.maxScan">10</str>
-                    <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
-                </lst>
-            </boundaryScanner>
-
-            <boundaryScanner name="breakIterator"
-                             class="solr.highlight.BreakIteratorBoundaryScanner">
-                <lst name="defaults">
-                    <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
-                    <str name="hl.bs.type">WORD</str>
-                    <!-- language and country are used when constructing Locale object.  -->
-                    <!-- And the Locale object will be used when getting instance of BreakIterator -->
-                    <str name="hl.bs.language">en</str>
-                    <str name="hl.bs.country">US</str>
-                </lst>
-            </boundaryScanner>
-        </highlighting>
-    </searchComponent>
 
     <!-- Update Processors
 
@@ -1741,20 +1255,6 @@
         <str name="content-type">text/plain; charset=UTF-8</str>
     </queryResponseWriter>
 
-    <!--
-       Custom response writers can be declared as needed...
-      -->
-    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
-
-
-    <!-- XSLT response writer transforms the XML output by any xslt file found
-         in Solr's conf/xslt directory.  Changes to xslt files are checked for
-         every xsltCacheLifetimeSeconds.
-      -->
-    <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
-        <int name="xsltCacheLifetimeSeconds">5</int>
-    </queryResponseWriter>
-
     <!-- Query Parsers
 
          http://wiki.apache.org/solr/SolrQuerySyntax