You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Pigeyre Romain <ro...@sopra.com> on 2014/09/24 20:12:33 UTC
Scoring with wild cars
Hi,
I hava two records with name_fra field
One with name_fra="un test CARREAU"
And another one with name_fra="un test CARRE"
{
"codeBarre": "1",
"name_FRA": "un test CARREAU"
}
{
"codeBarre": "2",
"name_FRA": "un test CARRE"
}
Configuration of these fields are :
<field name="name_FRA" type="text_general" indexed="true" stored="true" required="false" multiValued="false" />
<field name="codeBarre" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true" />
<copyField source="name_FRA" dest="text"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
When I'm using this query :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre*&fl=score%2C+*&wt=json&indent=true&debugQuery=true
The result is :
{
"responseHeader":{
"status":0,
"QTime":2,
"params":{
"debugQuery":"true",
"fl":"score, *",
"indent":"true",
"q":"text:carre*",
"wt":"json"}},
"response":{"numFound":2,"start":0,"maxScore":1.0,"docs":[
{
"codeBarre":"1",
"name_FRA":"un test CARREAU",
"_version_":1480150860842401792,
"score":1.0},
{
"codeBarre":"2",
"name_FRA":"un test CARRE",
"_version_":1480150875738472448,
"score":1.0}]
},
"debug":{
"rawquerystring":"text:carre*",
"querystring":"text:carre*",
"parsedquery":"text:carre*",
"parsedquery_toString":"text:carre*",
"explain":{
"1":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n 1.0 = boost\n 1.0 = queryNorm\n",
"2":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n 1.0 = boost\n 1.0 = queryNorm\n"},
"QParser":"LuceneQParser",
"timing":{
"time":2.0,
"prepare":{
"time":1.0,
"query":{
"time":1.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":0.0}},
"process":{
"time":1.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":1.0}}}}}
The score is the same for both of record. CARREAU record is first and CARRE is next. I want to place CARRE before CARREAU result because CARRE is an exact match. Is it possible?
NB : scoring for this query only use querynorm and boosters
In this test :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre&fl=score%2C*&wt=json&indent=true&debugQuery=true
I have only one record found but the scoring is more complex. Why?
{
"responseHeader":{
"status":0,
"QTime":2,
"params":{
"debugQuery":"true",
"fl":"score,*",
"indent":"true",
"q":"text:carre",
"wt":"json"}},
"response":{"numFound":1,"start":0,"maxScore":0.53033006,"docs":[
{
"codeBarre":"2",
"name_FRA":"un test CARRE",
"_version_":1480150875738472448,
"score":0.53033006}]
},
"debug":{
"rawquerystring":"text:carre",
"querystring":"text:carre",
"parsedquery":"text:carre",
"parsedquery_toString":"text:carre",
"explain":{
"2":"\n0.53033006 = (MATCH) weight(text:carre in 0) [DefaultSimilarity], result of:\n 0.53033006 = fieldWeight in 0, product of:\n 1.4142135 = tf(freq=2.0), with freq of:\n 2.0 = termFreq=2.0\n 1.0 = idf(docFreq=1, maxDocs=2)\n 0.375 = fieldNorm(doc=0)\n"},
"QParser":"LuceneQParser",
"timing":{
"time":2.0,
"prepare":{
"time":1.0,
"query":{
"time":1.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":0.0}},
"process":{
"time":1.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":1.0}}}}}
Romain PIGEYRE
Centre de service de Lyon
[Sopra]
Sopra
Parc du Puy d'Or
72 Allée des Noisetiers - CS 10137
69578 - LIMONEST
France
Phone : +33 (0)4 37 26 43 33
romain.pigeyre@sopra.com<ma...@sopra.com> - www.sopra.com<http://www.sopra.com>
[cid:image004.png@01CFD833.DFE6CB90]<http://www.linkedin.com/company/sopra> [cid:image006.png@01CFD833.DFE6CB90] <https://www.youtube.com/user/SopraChannel> [cid:image008.png@01CFD833.DFE6CB90] <https://www.facebook.com/sopragroup> [cid:image010.png@01CFD833.DFE6CB90] <https://twitter.com/soprarh> [cid:image012.png@01CFD833.DFE6CB90] <http://fr.viadeo.com/fr/company/sopra>
Ce message peut contenir des informations confidentielles dont la divulgation est à ce titre rigoureusement interdite en l'absence d'autorisation explicite de l'émetteur. Dans l'hypothèse où vous auriez reçu par erreur ce message, merci de le renvoyer à l'émetteur et de détruire toute copie.
P Pensez à l'environnement avant d'imprimer.
Re: Scoring with wild cars
Posted by Jack Krupansky <ja...@basetechnology.com>.
The wildcard query is “constant score” to make it faster, so unfortunately that means there is no score differentiation between the wildcard matches.
You can simple add the wildcard prefix as a separate query term and boost it:
q=text:carre* text:carre^1.5
-- Jack Krupansky
From: Pigeyre Romain
Sent: Wednesday, September 24, 2014 2:12 PM
To: solr-user@lucene.apache.org
Cc: Pigeyre Romain
Subject: Scoring with wild cars
Hi,
I hava two records with name_fra field
One with name_fra=”un test CARREAU”
And another one with name_fra=”un test CARRE”
{
"codeBarre": "1",
"name_FRA": "un test CARREAU"
}
{
"codeBarre": "2",
"name_FRA": "un test CARRE"
}
Configuration of these fields are :
<field name="name_FRA" type="text_general" indexed="true" stored="true" required="false" multiValued="false" />
<field name="codeBarre" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true" />
<copyField source="name_FRA" dest="text"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
When I’m using this query :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre*&fl=score%2C+*&wt=json&indent=true&debugQuery=true
The result is :
{
"responseHeader":{
"status":0,
"QTime":2,
"params":{
"debugQuery":"true",
"fl":"score, *",
"indent":"true",
"q":"text:carre*",
"wt":"json"}},
"response":{"numFound":2,"start":0,"maxScore":1.0,"docs":[
{
"codeBarre":"1",
"name_FRA":"un test CARREAU",
"_version_":1480150860842401792,
"score":1.0},
{
"codeBarre":"2",
"name_FRA":"un test CARRE",
"_version_":1480150875738472448,
"score":1.0}]
},
"debug":{
"rawquerystring":"text:carre*",
"querystring":"text:carre*",
"parsedquery":"text:carre*",
"parsedquery_toString":"text:carre*",
"explain":{
"1":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n 1.0 = boost\n 1.0 = queryNorm\n",
"2":"\n1.0 = (MATCH) ConstantScore(text:carre*), product of:\n 1.0 = boost\n 1.0 = queryNorm\n"},
"QParser":"LuceneQParser",
"timing":{
"time":2.0,
"prepare":{
"time":1.0,
"query":{
"time":1.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":0.0}},
"process":{
"time":1.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":1.0}}}}}
The score is the same for both of record. CARREAU record is first and CARRE is next. I want to place CARRE before CARREAU result because CARRE is an exact match. Is it possible?
NB : scoring for this query only use querynorm and boosters
In this test :
http://localhost:8983/solr/cdv_product/select?q=text%3Acarre&fl=score%2C*&wt=json&indent=true&debugQuery=true
I have only one record found but the scoring is more complex. Why?
{ "responseHeader":{ "status":0, "QTime":2, "params":{ "debugQuery":"true", "fl":"score,*", "indent":"true", "q":"text:carre", "wt":"json"}}, "response":{"numFound":1,"start":0,"maxScore":0.53033006,"docs":[ { "codeBarre":"2", "name_FRA":"un test CARRE", "_version_":1480150875738472448, "score":0.53033006}] }, "debug":{ "rawquerystring":"text:carre", "querystring":"text:carre", "parsedquery":"text:carre", "parsedquery_toString":"text:carre", "explain":{ "2":"\n0.53033006 = (MATCH) weight(text:carre in 0) [DefaultSimilarity], result of:\n 0.53033006 = fieldWeight in 0, product of:\n 1.4142135 = tf(freq=2.0), with freq of:\n 2.0 = termFreq=2.0\n 1.0 = idf(docFreq=1, maxDocs=2)\n 0.375 = fieldNorm(doc=0)\n"}, "QParser":"LuceneQParser", "timing":{ "time":2.0, "prepare":{ "time":1.0, "query":{ "time":1.0}, "facet":{ "time":0.0}, "mlt":{ "time":0.0}, "highlight":{ "time":0.0}, "stats":{ "time":0.0}, "expand":{ "time":0.0}, "debug":{ "time":0.0}}, "process":{ "time":1.0, "query":{ "time":0.0}, "facet":{ "time":0.0}, "mlt":{ "time":0.0}, "highlight":{ "time":0.0}, "stats":{ "time":0.0}, "expand":{ "time":0.0}, "debug":{ "time":1.0}}}}}
Romain PIGEYRE
Centre de service de Lyon
Sopra
Parc du Puy d'Or
72 Allée des Noisetiers - CS 10137
69578 - LIMONEST
France
Phone : +33 (0)4 37 26 43 33
romain.pigeyre@sopra.com - www.sopra.com
Ce message peut contenir des informations confidentielles dont la divulgation est à ce titre rigoureusement interdite en l'absence d'autorisation explicite de l'émetteur. Dans l'hypothèse où vous auriez reçu par erreur ce message, merci de le renvoyer à l'émetteur et de détruire toute copie.
P Pensez à l'environnement avant d'imprimer.