You are viewing a plain text version of this content. The canonical link for it is here.

Posted to solr-user@lucene.apache.org by Pigeyre Romain <ro...@sopra.com> on 2014/09/24 20:12:33 UTC

Scoring with wild cars

Hi,

I hava two records with name_fra field
One with name_fra="un test CARREAU"
And another one with name_fra="un test CARRE"

{
        "codeBarre": "1",
        "name_FRA": "un test CARREAU"
      }
{
        "codeBarre": "2",
        "name_FRA": "un test CARRE"
      }

Configuration of these fields are :

<field name="name_FRA" type="text_general" indexed="true" stored="true" required="false" multiValued="false" />
<field name="codeBarre" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true" />
<copyField source="name_FRA" dest="text"/>

<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>

When I'm using this query :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre*&fl=score%2C+*&wt=json&indent=true&debugQuery=true
The result is :
{
  "responseHeader":{
    "status":0,
    "QTime":2,
    "params":{
      "debugQuery":"true",
      "fl":"score, *",
      "indent":"true",
      "q":"text:carre*",
      "wt":"json"}},
  "response":{"numFound":2,"start":0,"maxScore":1.0,"docs":[
      {
       "codeBarre":"1",
        "name_FRA":"un test CARREAU",
        "_version_":1480150860842401792,
        "score":1.0},
      {
        "codeBarre":"2",
        "name_FRA":"un test CARRE",
        "_version_":1480150875738472448,
        "score":1.0}]
  },
  "debug":{
    "rawquerystring":"text:carre*",
    "querystring":"text:carre*",
    "parsedquery":"text:carre*",
    "parsedquery_toString":"text:carre*",
    "explain":{
      "1":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 = queryNorm\n",
      "2":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 = queryNorm\n"},
    "QParser":"LuceneQParser",
    "timing":{
      "time":2.0,
      "prepare":{
        "time":1.0,
        "query":{
          "time":1.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":0.0}},
      "process":{
        "time":1.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":1.0}}}}}

The score is the same for both of record. CARREAU record is first and CARRE is next. I want to place CARRE before CARREAU result because CARRE is an exact match. Is it possible?

NB : scoring for this query only use querynorm and boosters

In this test :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre&fl=score%2C*&wt=json&indent=true&debugQuery=true

I have only one record found but the scoring is more complex. Why?

{

  "responseHeader":{

    "status":0,

    "QTime":2,

    "params":{

      "debugQuery":"true",

      "fl":"score,*",

      "indent":"true",

      "q":"text:carre",

      "wt":"json"}},

  "response":{"numFound":1,"start":0,"maxScore":0.53033006,"docs":[

      {

        "codeBarre":"2",

        "name_FRA":"un test CARRE",

        "_version_":1480150875738472448,

        "score":0.53033006}]

  },

  "debug":{

    "rawquerystring":"text:carre",

    "querystring":"text:carre",

    "parsedquery":"text:carre",

    "parsedquery_toString":"text:carre",

    "explain":{

      "2":"\n0.53033006 = (MATCH) weight(text:carre in 0) [DefaultSimilarity], result of:\n  0.53033006 = fieldWeight in 0, product of:\n    1.4142135 = tf(freq=2.0), with freq of:\n      2.0 = termFreq=2.0\n    1.0 = idf(docFreq=1, maxDocs=2)\n    0.375 = fieldNorm(doc=0)\n"},

    "QParser":"LuceneQParser",

    "timing":{

      "time":2.0,

      "prepare":{

        "time":1.0,

        "query":{

          "time":1.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":0.0}},

      "process":{

        "time":1.0,

        "query":{

          "time":0.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":1.0}}}}}





Romain PIGEYRE
Centre de service de Lyon

[Sopra]

Sopra
Parc du Puy d'Or
72 Allée des Noisetiers - CS 10137
69578 - LIMONEST
France
Phone : +33 (0)4 37 26 43 33
romain.pigeyre@sopra.com<ma...@sopra.com> - www.sopra.com<http://www.sopra.com>


[cid:image004.png@01CFD833.DFE6CB90]<http://www.linkedin.com/company/sopra> [cid:image006.png@01CFD833.DFE6CB90] <https://www.youtube.com/user/SopraChannel>  [cid:image008.png@01CFD833.DFE6CB90] <https://www.facebook.com/sopragroup>  [cid:image010.png@01CFD833.DFE6CB90] <https://twitter.com/soprarh>  [cid:image012.png@01CFD833.DFE6CB90] <http://fr.viadeo.com/fr/company/sopra>
Ce message peut contenir des informations confidentielles dont la divulgation est à ce titre rigoureusement interdite en l'absence d'autorisation explicite de l'émetteur. Dans l'hypothèse où vous auriez reçu par erreur ce message, merci de le renvoyer à l'émetteur et de détruire toute copie.

P Pensez à l'environnement avant d'imprimer.

Re: Scoring with wild cars

Posted by Jack Krupansky <ja...@basetechnology.com>.

The wildcard query is “constant score” to make it faster, so unfortunately that means there is no score differentiation between the wildcard matches.

You can simple add the wildcard prefix as a separate query term and boost it:

q=text:carre* text:carre^1.5

-- Jack Krupansky

From: Pigeyre Romain 
Sent: Wednesday, September 24, 2014 2:12 PM
To: solr-user@lucene.apache.org 
Cc: Pigeyre Romain 
Subject: Scoring with wild cars

Hi,

 

I hava two records with name_fra field

One with name_fra=”un test CARREAU”

And another one with name_fra=”un test CARRE”

 

{

        "codeBarre": "1",

        "name_FRA": "un test CARREAU"

      }

{

        "codeBarre": "2",

        "name_FRA": "un test CARRE"

      }

 

Configuration of these fields are :

 

<field name="name_FRA" type="text_general" indexed="true" stored="true" required="false" multiValued="false" />

<field name="codeBarre" type="string" indexed="true" stored="true" required="true" multiValued="false" />

<field name="text" type="text_general" indexed="true" stored="false" multiValued="true" />

<copyField source="name_FRA" dest="text"/>

 

<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">

      <analyzer type="index">

        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />

        <!-- in this example, we will only use synonyms at query time

        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>

        -->

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.ASCIIFoldingFilterFactory"/>

      </analyzer>

      <analyzer type="query">

        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />

        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>

        <filter class="solr.LowerCaseFilterFactory"/>

        <filter class="solr.ASCIIFoldingFilterFactory"/>

      </analyzer>

    </fieldType>

 

When I’m using this query :

http://localhost:8983/solr/cdv_product/select?q=text%3Acarre*&fl=score%2C+*&wt=json&indent=true&debugQuery=true

The result is :

{

  "responseHeader":{

    "status":0,

    "QTime":2,

    "params":{

      "debugQuery":"true",

      "fl":"score, *",

      "indent":"true",

      "q":"text:carre*",

      "wt":"json"}},

  "response":{"numFound":2,"start":0,"maxScore":1.0,"docs":[

      {

       "codeBarre":"1",

        "name_FRA":"un test CARREAU",

        "_version_":1480150860842401792,

        "score":1.0},

      {

        "codeBarre":"2",

        "name_FRA":"un test CARRE",

        "_version_":1480150875738472448,

        "score":1.0}]

  },

  "debug":{

    "rawquerystring":"text:carre*",

    "querystring":"text:carre*",

    "parsedquery":"text:carre*",

    "parsedquery_toString":"text:carre*",

    "explain":{

      "1":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 = queryNorm\n",

      "2":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n  1.0 = boost\n  1.0 = queryNorm\n"},

    "QParser":"LuceneQParser",

    "timing":{

      "time":2.0,

      "prepare":{

        "time":1.0,

        "query":{

          "time":1.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":0.0}},

      "process":{

        "time":1.0,

        "query":{

          "time":0.0},

        "facet":{

          "time":0.0},

        "mlt":{

          "time":0.0},

        "highlight":{

          "time":0.0},

        "stats":{

          "time":0.0},

        "expand":{

          "time":0.0},

        "debug":{

          "time":1.0}}}}}

 

The score is the same for both of record. CARREAU record is first and CARRE is next. I want to place CARRE before CARREAU result because CARRE is an exact match. Is it possible?

 

NB : scoring for this query only use querynorm and boosters

 

In this test :

http://localhost:8983/solr/cdv_product/select?q=text%3Acarre&fl=score%2C*&wt=json&indent=true&debugQuery=true

 

I have only one record found but the scoring is more complex. Why?

{  "responseHeader":{    "status":0,    "QTime":2,    "params":{      "debugQuery":"true",      "fl":"score,*",      "indent":"true",      "q":"text:carre",      "wt":"json"}},  "response":{"numFound":1,"start":0,"maxScore":0.53033006,"docs":[      {        "codeBarre":"2",        "name_FRA":"un test CARRE",        "_version_":1480150875738472448,        "score":0.53033006}]  },  "debug":{    "rawquerystring":"text:carre",    "querystring":"text:carre",    "parsedquery":"text:carre",    "parsedquery_toString":"text:carre",    "explain":{      "2":"\n0.53033006 = (MATCH) weight(text:carre in 0) [DefaultSimilarity], result of:\n  0.53033006 = fieldWeight in 0, product of:\n    1.4142135 = tf(freq=2.0), with freq of:\n      2.0 = termFreq=2.0\n    1.0 = idf(docFreq=1, maxDocs=2)\n    0.375 = fieldNorm(doc=0)\n"},    "QParser":"LuceneQParser",    "timing":{      "time":2.0,      "prepare":{        "time":1.0,        "query":{          "time":1.0},        "facet":{          "time":0.0},        "mlt":{          "time":0.0},        "highlight":{          "time":0.0},        "stats":{          "time":0.0},        "expand":{          "time":0.0},        "debug":{          "time":0.0}},      "process":{        "time":1.0,        "query":{          "time":0.0},        "facet":{          "time":0.0},        "mlt":{          "time":0.0},        "highlight":{          "time":0.0},        "stats":{          "time":0.0},        "expand":{          "time":0.0},        "debug":{          "time":1.0}}}}} 

 

 

 


      Romain PIGEYRE

      Centre de service de Lyon
     

     
      Sopra
      Parc du Puy d'Or
      72 Allée des Noisetiers - CS 10137
      69578 - LIMONEST
      France
      Phone : +33 (0)4 37 26 43 33
      romain.pigeyre@sopra.com - www.sopra.com
     


     

Ce message peut contenir des informations confidentielles dont la divulgation est à ce titre rigoureusement interdite en l'absence d'autorisation explicite de l'émetteur. Dans l'hypothèse où vous auriez reçu par erreur ce message, merci de le renvoyer à l'émetteur et de détruire toute copie.

 

P Pensez à l'environnement avant d'imprimer.