You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2014/04/28 16:45:32 UTC
svn commit: r1590660 - in /jackrabbit/oak/trunk/oak-solr-core/src/main:
java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
resources/solr/oak/conf/solrconfig.xml
Author: tommaso
Date: Mon Apr 28 14:45:32 2014
New Revision: 1590660
URL: http://svn.apache.org/r1590660
Log:
OAK-1774 - escaped field names in full text query expansion
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java?rev=1590660&r1=1590659&r2=1590660&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java Mon Apr 28 14:45:32 2014
@@ -297,7 +297,7 @@ public class SolrQueryIndex implements F
if (p == null) {
p = configuration.getCatchAllField();
}
- fullTextString.append(p);
+ fullTextString.append(partialEscape(p));
fullTextString.append(':');
String termText = term.getText();
if (termText.indexOf(' ') > 0) {
Modified: jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1590660&r1=1590659&r2=1590660&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml Mon Apr 28 14:45:32 2014
@@ -196,6 +196,7 @@
<!--
<mergeFactor>10</mergeFactor>
-->
+
<!-- Expert: Merge Scheduler
The Merge Scheduler in Lucene controls how merges are
performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
@@ -794,8 +795,9 @@
-->
<lst name="defaults">
<str name="echoParams">explicit</str>
+ <str name="wt">json</str>
<int name="rows">10</int>
- <str name="df">text</str>
+ <str name="df">catch_all</str>
</lst>
<!-- In addition to defaults, "appends" params can be specified
to identify values which should be appended to the list of
@@ -882,102 +884,7 @@
</requestHandler>
- <!-- A Robust Example
- This example SearchHandler declaration shows off usage of the
- SearchHandler with many defaults declared
-
- Note that multiple instances of the same Request Handler
- (SearchHandler) can be registered multiple times with different
- names (and different init parameters)
- -->
- <requestHandler name="/browse" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
-
- <!-- VelocityResponseWriter settings -->
- <str name="wt">velocity</str>
- <str name="v.template">browse</str>
- <str name="v.layout">layout</str>
- <str name="title">Solritas</str>
-
- <!-- Query settings -->
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="df">text</str>
- <str name="mm">100%</str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
-
- <str name="mlt.qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
- </str>
- <str name="mlt.fl">text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename</str>
- <int name="mlt.count">3</int>
-
- <!-- Faceting defaults -->
- <str name="facet">on</str>
- <str name="facet.field">cat</str>
- <str name="facet.field">manu_exact</str>
- <str name="facet.field">content_type</str>
- <str name="facet.field">author_s</str>
- <str name="facet.query">ipod</str>
- <str name="facet.query">GB</str>
- <str name="facet.mincount">1</str>
- <str name="facet.pivot">cat,inStock</str>
- <str name="facet.range.other">after</str>
- <str name="facet.range">price</str>
- <int name="f.price.facet.range.start">0</int>
- <int name="f.price.facet.range.end">600</int>
- <int name="f.price.facet.range.gap">50</int>
- <str name="facet.range">popularity</str>
- <int name="f.popularity.facet.range.start">0</int>
- <int name="f.popularity.facet.range.end">10</int>
- <int name="f.popularity.facet.range.gap">3</int>
- <str name="facet.range">manufacturedate_dt</str>
- <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
- <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
- <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
- <str name="f.manufacturedate_dt.facet.range.other">before</str>
- <str name="f.manufacturedate_dt.facet.range.other">after</str>
-
- <!-- Highlighting defaults -->
- <str name="hl">on</str>
- <str name="hl.fl">content features title name</str>
- <str name="hl.encoder">html</str>
- <str name="hl.simple.pre"><b></str>
- <str name="hl.simple.post"></b></str>
- <str name="f.title.hl.fragsize">0</str>
- <str name="f.title.hl.alternateField">title</str>
- <str name="f.name.hl.fragsize">0</str>
- <str name="f.name.hl.alternateField">name</str>
- <str name="f.content.hl.snippets">3</str>
- <str name="f.content.hl.fragsize">200</str>
- <str name="f.content.hl.alternateField">content</str>
- <str name="f.content.hl.maxAlternateFieldLength">750</str>
-
- <!-- Spell checking defaults -->
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">false</str>
- <str name="spellcheck.count">5</str>
- <str name="spellcheck.alternativeTermCount">2</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">5</str>
- <str name="spellcheck.maxCollations">3</str>
- </lst>
-
- <!-- append spellchecking to our list of components -->
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
<!-- Update Request Handler.
@@ -1154,14 +1061,6 @@
<!-- <str name="healthcheckFile">server-enabled.txt</str> -->
</requestHandler>
- <!-- Echo the request contents back to the client -->
- <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="echoHandler">true</str>
- </lst>
- </requestHandler>
-
<!-- Solr Replication
The SolrReplicationHandler supports replicating indexes from a
@@ -1247,392 +1146,7 @@
http://wiki.apache.org/solr/SpellCheckComponent
-->
- <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
-
- <str name="queryAnalyzerFieldType">text_general</str>
-
- <!-- Multiple "Spell Checkers" can be declared and used by this
- component
- -->
-
- <!-- a spellchecker built from a field of the main index -->
- <lst name="spellchecker">
- <str name="name">default</str>
- <str name="field">text</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
- <str name="distanceMeasure">internal</str>
- <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
- <float name="accuracy">0.5</float>
- <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
- <int name="maxEdits">2</int>
- <!-- the minimum shared prefix when enumerating terms -->
- <int name="minPrefix">1</int>
- <!-- maximum number of inspections per result. -->
- <int name="maxInspections">5</int>
- <!-- minimum length of a query term to be considered for correction -->
- <int name="minQueryLength">4</int>
- <!-- maximum threshold of documents a query term can appear to be considered for correction -->
- <float name="maxQueryFrequency">0.01</float>
- <!-- uncomment this to require suggestions to occur in 1% of the documents
- <float name="thresholdTokenFrequency">.01</float>
- -->
- </lst>
-
- <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
- <lst name="spellchecker">
- <str name="name">wordbreak</str>
- <str name="classname">solr.WordBreakSolrSpellChecker</str>
- <str name="field">name</str>
- <str name="combineWords">true</str>
- <str name="breakWords">true</str>
- <int name="maxChanges">10</int>
- </lst>
-
- <!-- a spellchecker that uses a different distance measure -->
- <!--
- <lst name="spellchecker">
- <str name="name">jarowinkler</str>
- <str name="field">spell</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="distanceMeasure">
- org.apache.lucene.search.spell.JaroWinklerDistance
- </str>
- </lst>
- -->
-
- <!-- a spellchecker that use an alternate comparator
-
- comparatorClass be one of:
- 1. score (default)
- 2. freq (Frequency first, then score)
- 3. A fully qualified class name
- -->
- <!--
- <lst name="spellchecker">
- <str name="name">freq</str>
- <str name="field">lowerfilt</str>
- <str name="classname">solr.DirectSolrSpellChecker</str>
- <str name="comparatorClass">freq</str>
- -->
-
- <!-- A spellchecker that reads the list of words from a file -->
- <!--
- <lst name="spellchecker">
- <str name="classname">solr.FileBasedSpellChecker</str>
- <str name="name">file</str>
- <str name="sourceLocation">spellings.txt</str>
- <str name="characterEncoding">UTF-8</str>
- <str name="spellcheckIndexDir">spellcheckerFile</str>
- </lst>
- -->
- </searchComponent>
-
- <!-- A request handler for demonstrating the spellcheck component.
-
- NOTE: This is purely as an example. The whole purpose of the
- SpellCheckComponent is to hook it into the request handler that
- handles your normal user queries so that a separate request is
- not needed to get suggestions.
-
- IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
- NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
-
- See http://wiki.apache.org/solr/SpellCheckComponent for details
- on the request parameters.
- -->
- <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <!-- Solr will use suggestions from both the 'default' spellchecker
- and from the 'wordbreak' spellchecker and combine them.
- collations (re-written queries) can include a combination of
- corrections from both spellcheckers -->
- <str name="spellcheck.dictionary">default</str>
- <str name="spellcheck.dictionary">wordbreak</str>
- <str name="spellcheck">on</str>
- <str name="spellcheck.extendedResults">true</str>
- <str name="spellcheck.count">10</str>
- <str name="spellcheck.alternativeTermCount">5</str>
- <str name="spellcheck.maxResultsForSuggest">5</str>
- <str name="spellcheck.collate">true</str>
- <str name="spellcheck.collateExtendedResults">true</str>
- <str name="spellcheck.maxCollationTries">10</str>
- <str name="spellcheck.maxCollations">5</str>
- </lst>
- <arr name="last-components">
- <str>spellcheck</str>
- </arr>
- </requestHandler>
-
- <searchComponent name="suggest" class="solr.SuggestComponent">
- <lst name="suggester">
- <str name="name">mySuggester</str>
- <str name="lookupImpl">FuzzyLookupFactory</str> <!-- org.apache.solr.spelling.suggest.fst -->
- <str name="dictionaryImpl">DocumentDictionaryFactory</str> <!-- org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory -->
- <str name="field">cat</str>
- <str name="weightField">price</str>
- <str name="suggestAnalyzerFieldType">string</str>
- </lst>
- </searchComponent>
-
- <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="suggest">true</str>
- <str name="suggest.count">10</str>
- </lst>
- <arr name="components">
- <str>suggest</str>
- </arr>
- </requestHandler>
- <!-- Term Vector Component
-
- http://wiki.apache.org/solr/TermVectorComponent
- -->
- <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
-
- <!-- A request handler for demonstrating the term vector component
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="df">text</str>
- <bool name="tv">true</bool>
- </lst>
- <arr name="last-components">
- <str>tvComponent</str>
- </arr>
- </requestHandler>
-
- <!-- Clustering Component
-
- You'll need to set the solr.clustering.enabled system property
- when running solr to run with clustering enabled:
-
- java -Dsolr.clustering.enabled=true -jar start.jar
-
- http://wiki.apache.org/solr/ClusteringComponent
- http://carrot2.github.io/solr-integration-strategies/
- -->
- <searchComponent name="clustering"
- enable="${solr.clustering.enabled:false}"
- class="solr.clustering.ClusteringComponent" >
- <lst name="engine">
- <str name="name">lingo</str>
-
- <!-- Class name of a clustering algorithm compatible with the Carrot2 framework.
-
- Currently available open source algorithms are:
- * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
- * org.carrot2.clustering.stc.STCClusteringAlgorithm
- * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-
- See http://project.carrot2.org/algorithms.html for more information.
-
- A commercial algorithm Lingo3G (needs to be installed separately) is defined as:
- * com.carrotsearch.lingo3g.Lingo3GClusteringAlgorithm
- -->
- <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
- <!-- Override location of the clustering algorithm's resources
- (attribute definitions and lexical resources).
-
- A directory from which to load algorithm-specific stop words,
- stop labels and attribute definition XMLs.
-
- For an overview of Carrot2 lexical resources, see:
- http://download.carrot2.org/head/manual/#chapter.lexical-resources
-
- For an overview of Lingo3G lexical resources, see:
- http://download.carrotsearch.com/lingo3g/manual/#chapter.lexical-resources
- -->
- <str name="carrot.resourcesDir">clustering/carrot2</str>
- </lst>
-
- <!-- An example definition for the STC clustering algorithm. -->
- <lst name="engine">
- <str name="name">stc</str>
- <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
- </lst>
-
- <!-- An example definition for the bisecting kmeans clustering algorithm. -->
- <lst name="engine">
- <str name="name">kmeans</str>
- <str name="carrot.algorithm">org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm</str>
- </lst>
- </searchComponent>
-
- <!-- A request handler for demonstrating the clustering component
-
- This is purely as an example.
-
- In reality you will likely want to add the component to your
- already specified request handlers.
- -->
- <requestHandler name="/clustering"
- startup="lazy"
- enable="${solr.clustering.enabled:false}"
- class="solr.SearchHandler">
- <lst name="defaults">
- <bool name="clustering">true</bool>
- <bool name="clustering.results">true</bool>
- <!-- Field name with the logical "title" of a each document (optional) -->
- <str name="carrot.title">name</str>
- <!-- Field name with the logical "URL" of a each document (optional) -->
- <str name="carrot.url">id</str>
- <!-- Field name with the logical "content" of a each document (optional) -->
- <str name="carrot.snippet">features</str>
- <!-- Apply highlighter to the title/ content and use this for clustering. -->
- <bool name="carrot.produceSummary">true</bool>
- <!-- the maximum number of labels per cluster -->
- <!--<int name="carrot.numDescriptions">5</int>-->
- <!-- produce sub clusters -->
- <bool name="carrot.outputSubClusters">false</bool>
-
- <!-- Configure the remaining request handler parameters. -->
- <str name="defType">edismax</str>
- <str name="qf">
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- </str>
- <str name="q.alt">*:*</str>
- <str name="rows">10</str>
- <str name="fl">*,score</str>
- </lst>
- <arr name="last-components">
- <str>clustering</str>
- </arr>
- </requestHandler>
-
- <!-- Terms Component
-
- http://wiki.apache.org/solr/TermsComponent
-
- A component to return terms and document frequency of those
- terms
- -->
- <searchComponent name="terms" class="solr.TermsComponent"/>
-
- <!-- A request handler for demonstrating the terms component -->
- <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <bool name="terms">true</bool>
- <bool name="distrib">false</bool>
- </lst>
- <arr name="components">
- <str>terms</str>
- </arr>
- </requestHandler>
-
-
- <!-- Highlighting Component
-
- http://wiki.apache.org/solr/HighlightingParameters
- -->
- <searchComponent class="solr.HighlightComponent" name="highlight">
- <highlighting>
- <!-- Configure the standard fragmenter -->
- <!-- This could most likely be commented out in the "default" case -->
- <fragmenter name="gap"
- default="true"
- class="solr.highlight.GapFragmenter">
- <lst name="defaults">
- <int name="hl.fragsize">100</int>
- </lst>
- </fragmenter>
-
- <!-- A regular-expression-based fragmenter
- (for sentence extraction)
- -->
- <fragmenter name="regex"
- class="solr.highlight.RegexFragmenter">
- <lst name="defaults">
- <!-- slightly smaller fragsizes work better because of slop -->
- <int name="hl.fragsize">70</int>
- <!-- allow 50% slop on fragment sizes -->
- <float name="hl.regex.slop">0.5</float>
- <!-- a basic sentence pattern -->
- <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
- </lst>
- </fragmenter>
-
- <!-- Configure the standard formatter -->
- <formatter name="html"
- default="true"
- class="solr.highlight.HtmlFormatter">
- <lst name="defaults">
- <str name="hl.simple.pre"><![CDATA[<em>]]></str>
- <str name="hl.simple.post"><![CDATA[</em>]]></str>
- </lst>
- </formatter>
-
- <!-- Configure the standard encoder -->
- <encoder name="html"
- class="solr.highlight.HtmlEncoder" />
-
- <!-- Configure the standard fragListBuilder -->
- <fragListBuilder name="simple"
- class="solr.highlight.SimpleFragListBuilder"/>
-
- <!-- Configure the single fragListBuilder -->
- <fragListBuilder name="single"
- class="solr.highlight.SingleFragListBuilder"/>
-
- <!-- Configure the weighted fragListBuilder -->
- <fragListBuilder name="weighted"
- default="true"
- class="solr.highlight.WeightedFragListBuilder"/>
-
- <!-- default tag FragmentsBuilder -->
- <fragmentsBuilder name="default"
- default="true"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <!--
- <lst name="defaults">
- <str name="hl.multiValuedSeparatorChar">/</str>
- </lst>
- -->
- </fragmentsBuilder>
-
- <!-- multi-colored tag FragmentsBuilder -->
- <fragmentsBuilder name="colored"
- class="solr.highlight.ScoreOrderFragmentsBuilder">
- <lst name="defaults">
- <str name="hl.tag.pre"><![CDATA[
- <b style="background:yellow">,<b style="background:lawgreen">,
- <b style="background:aquamarine">,<b style="background:magenta">,
- <b style="background:palegreen">,<b style="background:coral">,
- <b style="background:wheat">,<b style="background:khaki">,
- <b style="background:lime">,<b style="background:deepskyblue">]]></str>
- <str name="hl.tag.post"><![CDATA[</b>]]></str>
- </lst>
- </fragmentsBuilder>
-
- <boundaryScanner name="default"
- default="true"
- class="solr.highlight.SimpleBoundaryScanner">
- <lst name="defaults">
- <str name="hl.bs.maxScan">10</str>
- <str name="hl.bs.chars">.,!? 	 </str>
- </lst>
- </boundaryScanner>
-
- <boundaryScanner name="breakIterator"
- class="solr.highlight.BreakIteratorBoundaryScanner">
- <lst name="defaults">
- <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
- <str name="hl.bs.type">WORD</str>
- <!-- language and country are used when constructing Locale object. -->
- <!-- And the Locale object will be used when getting instance of BreakIterator -->
- <str name="hl.bs.language">en</str>
- <str name="hl.bs.country">US</str>
- </lst>
- </boundaryScanner>
- </highlighting>
- </searchComponent>
<!-- Update Processors
@@ -1741,20 +1255,6 @@
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
- <!--
- Custom response writers can be declared as needed...
- -->
- <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
-
-
- <!-- XSLT response writer transforms the XML output by any xslt file found
- in Solr's conf/xslt directory. Changes to xslt files are checked for
- every xsltCacheLifetimeSeconds.
- -->
- <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
- <int name="xsltCacheLifetimeSeconds">5</int>
- </queryResponseWriter>
-
<!-- Query Parsers
http://wiki.apache.org/solr/SolrQuerySyntax