You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by "Wasim (JIRA)" <ji...@apache.org> on 2015/03/16 15:19:38 UTC

[jira] [Updated] (SOLR-7250) In spellcheck.extendedResults=true freq value of suggestion differs from it actual origFreq

     [ https://issues.apache.org/jira/browse/SOLR-7250?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Wasim updated SOLR-7250:
------------------------
    Description: 
Original frequency is not matching with suggestion frequency in SOLR

Output for "whs is" - (73) which is a suggestion of "who is" varies than its actual original frequency (94)
For your reference attaching two images of the output

My schema.xml

<field name="gram" type="textSpell" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="gram_ci" type="textSpellCi" indexed="true" stored="false" multiValued="false"/>

<copyField source="gram" dest="gram_ci"/>

<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>
<fieldType name="textSpellCi" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
        <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
        </analyzer>
</fieldType>

solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
        <str name="queryAnalyzerFieldType">textSpellCi</str>
        <lst name="spellchecker">
                <str name="name">default</str>
                <str name="field">gram_ci</str>
                <str name="classname">solr.DirectSolrSpellChecker</str>
                <str name="distanceMeasure">internal</str>
                <float name="accuracy">0.5</float>
                <int name="maxEdits">2</int>
                <int name="minPrefix">0</int>
                <int name="maxInspections">5</int>
                <int name="minQueryLength">2</int>
                <float name="maxQueryFrequency">0.99</float>
                <str name="comparatorClass">freq</str>
                <float name="thresholdTokenFrequency">0.0</float>
        </lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
        <lst name="defaults">
                <str name="df">gram_ci</str>
                <str name="spellcheck.dictionary">default</str>
                <str name="spellcheck">on</str>
                <str name="spellcheck.extendedResults">true</str>
                <str name="spellcheck.count">15</str>
                <str name="spellcheck.alternativeTermCount">10</str>
                <str name="spellcheck.onlyMorePopular">false</str>
        </lst>
        <arr name="last-components">
                <str>spellcheck</str>
        </arr>
</requestHandler>

  was:
Original frequency is not matching with suggestion frequency in SOLR

Output for "whs is" - (73) which is a suggestion of "who is" varies than its actual original frequency (94)
For your reference attaching two images of the output

My schema.xml

<field name="gram" type="textSpell" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="gram_ci" type="textSpellCi" indexed="true" stored="false" multiValued="false"/>

<copyField source="gram" dest="gram_ci"/>

<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
    </analyzer>
</fieldType>
<fieldType name="textSpellCi" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
    </analyzer>
</fieldType>

solrconfig.xml

<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
    <str name="queryAnalyzerFieldType">textSpellCi</str>
    <lst name="spellchecker">
        <str name="name">default</str>
        <str name="field">gram_ci</str>
        <str name="classname">solr.DirectSolrSpellChecker</str>
        <str name="distanceMeasure">internal</str>
        <float name="accuracy">0.5</float>
        <int name="maxEdits">2</int>
        <int name="minPrefix">0</int>
        <int name="maxInspections">5</int>
        <int name="minQueryLength">2</int>
        <float name="maxQueryFrequency">0.99</float>
        <str name="comparatorClass">freq</str>
        <float name="thresholdTokenFrequency">0.0</float>
    </lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
        <str name="df">gram_ci</str>
        <str name="spellcheck.dictionary">default</str>
        <str name="spellcheck">on</str>
        <str name="spellcheck.extendedResults">true</str>
        <str name="spellcheck.count">15</str>
        <str name="spellcheck.alternativeTermCount">10</str>
        <str name="spellcheck.onlyMorePopular">false</str>
    </lst>
    <arr name="last-components">
        <str>spellcheck</str>
    </arr>
</requestHandler>


> In spellcheck.extendedResults=true freq value of suggestion differs from it actual origFreq 
> --------------------------------------------------------------------------------------------
>
>                 Key: SOLR-7250
>                 URL: https://issues.apache.org/jira/browse/SOLR-7250
>             Project: Solr
>          Issue Type: New Feature
>         Environment: solr 4.10.4
>            Reporter: Wasim
>   Original Estimate: 24h
>  Remaining Estimate: 24h
>
> Original frequency is not matching with suggestion frequency in SOLR
> Output for "whs is" - (73) which is a suggestion of "who is" varies than its actual original frequency (94)
> For your reference attaching two images of the output
> My schema.xml
> <field name="gram" type="textSpell" indexed="true" stored="true" required="true" multiValued="false"/>
> <field name="gram_ci" type="textSpellCi" indexed="true" stored="false" multiValued="false"/>
> <copyField source="gram" dest="gram_ci"/>
> <fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100">
>         <analyzer type="index">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
>         <analyzer type="query">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
> </fieldType>
> <fieldType name="textSpellCi" class="solr.TextField" positionIncrementGap="100">
>         <analyzer type="index">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
>         <analyzer type="query">
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.ShingleFilterFactory" maxShingleSize="5" minShingleSize="2" outputUnigrams="true"/>
>         </analyzer>
> </fieldType>
> solrconfig.xml
> <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
>         <str name="queryAnalyzerFieldType">textSpellCi</str>
>         <lst name="spellchecker">
>                 <str name="name">default</str>
>                 <str name="field">gram_ci</str>
>                 <str name="classname">solr.DirectSolrSpellChecker</str>
>                 <str name="distanceMeasure">internal</str>
>                 <float name="accuracy">0.5</float>
>                 <int name="maxEdits">2</int>
>                 <int name="minPrefix">0</int>
>                 <int name="maxInspections">5</int>
>                 <int name="minQueryLength">2</int>
>                 <float name="maxQueryFrequency">0.99</float>
>                 <str name="comparatorClass">freq</str>
>                 <float name="thresholdTokenFrequency">0.0</float>
>         </lst>
> </searchComponent>
> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
>         <lst name="defaults">
>                 <str name="df">gram_ci</str>
>                 <str name="spellcheck.dictionary">default</str>
>                 <str name="spellcheck">on</str>
>                 <str name="spellcheck.extendedResults">true</str>
>                 <str name="spellcheck.count">15</str>
>                 <str name="spellcheck.alternativeTermCount">10</str>
>                 <str name="spellcheck.onlyMorePopular">false</str>
>         </lst>
>         <arr name="last-components">
>                 <str>spellcheck</str>
>         </arr>
> </requestHandler>



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org