You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Webster Homer <we...@milliporesigma.com> on 2018/10/30 20:34:44 UTC

Odd Scoring behavior

I noticed that sometimes query matches seem to get counted twice when they are scored. This will happen if the fieldtype is being stemmed, and there is a matching synonym.
It seems that the score for the field is 2X higher than it should be. We see this only when there is a matching synonym that has a stemmed term in it.


We have this synonym defined:
bsa, bovine serum albumin

We have this fieldtype:
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordRepeatFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
     </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" />
        <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordRepeatFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>

Which is used as:
    <field name="search_en_root_name" type="text_general" indexed="true" stored="true" required="false" multiValued="false" />

When we query this field using the eDismax query parser the field, search_en_root_name seems to contribute twice to the score for this query:
bovine serum albumin

once for the base query, and once for the stemmed form of the query:
bovin serum albumin

If we remove the synonym it will only be counted once. We only see this behavior If part of the synonym can be stemmed. This seems odd and has the effect of overpowering boosts on other fields.

The explain plan without synonym
{
  "responseHeader":{
    "zkConnected":true,
    "status":0,
    "QTime":44,
    "params":{
      "mm":"2<-25%",
      "fl":"searchmv_pno, search_en_p_pri_name [explain style=nl]",
      "group.limit":"1",
      "q.op":"OR",
      "sort":"score desc,sort_en_name asc ,sort_ds asc,  search_pid asc",
      "group.ngroups":"true",
      "q":"bovine serum albumin",
      "tie":".45",
      "defType":"edismax",
      "group.sort":"sort_ds asc, score desc",
      "qf":"search_en_p_pri_name_min^7500
search_en_root_name_min^12000 search_en_p_pri_name^3000
search_pid^2500 searchmv_pno^2500 searchmv_cas_number^2500
searchmv_p_skus^2500 search_lform_lc^2500  search_en_root_name^2500
searchmv_en_s_pri_name^2500 searchmv_en_keywords^2500
searchmv_lookahead_terms^2000 searchmv_user_term^2000
searchmv_en_acronym^1500 searchmv_en_synonyms^1500
searchmv_concat_sku^1000 search_concat_pno^1000
searchmv_en_name_suf^1000 searchmv_component_cas^1000
search_lform^1000 searchmv_pno_genr^500 search_concat_pno_genr^500
searchmv_p_skus_genr^500 search_eform search_mol_form searchmv_component_molform searchmv_en_descriptions searchmv_en_chem_comp searchmv_en_attributes searchmv_en_page_title search_mdl_number searchmv_xref_comparable_pno searchmv_xref_comparable_sku searchmv_xref_equivalent_pno searchmv_xref_exact_pno searchmv_xref_exact_sku searchmv_vendor_sku searchmv_material_number search_en_sortkey searchmv_rtecs search_color_idx search_beilstein search_ecnumber search_egecnumber search_femanumber searchmv_isbn",
      "group.field":"id_s",
      "_":"1540331449276",
      "group":"true"}},
  "grouped":{
    "id_s":{
      "matches":4701,
      "ngroups":4393,
      "groups":[{
          "groupValue":"bovineserumalbumin123459048468",
          "doclist":{"numFound":57,"start":0,"docs":[
              {
                "search_en_p_pri_name":"Bovine Serum Albumin",
                "searchmv_pno":["A2153"],
                "[explain]":{
                  "match":true,
                  "value":38145.117,
                  "description":"max plus 0.45 times others of:",
                  "details":[{
                      "match":true,
                      "value":10434.111,
                      "description":"sum of:",
                      "details":[{
                          "match":true,
                          "value":4042.5876,

"description":"weight(Synonym(search_en_root_name:bovin
search_en_root_name:bovine) in 20407) [SialBM25Similarity], result of:",
                          "details":[{
                              "match":true,
                              "value":4042.5876,
                              "description":"score(doc=20407,freq=2.0
= termFreq=2.0\n), product of:",
                              "details":[{
                                  "match":true,
                                  "value":2500.0,
                                  "description":"boost"},
                                {
                                  "match":true,
                                  "value":1.0,
                                  "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":204.0,
                                      "description":"docFreq"},
                                    {
                                      "match":true,
                                      "value":365301.0,
                                      "description":"docCount"}]},
                                {
                                  "match":true,
                                  "value":1.617035,
                                  "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":2.0,
                                      "description":"termFreq=2.0"},
                                    {
                                      "match":true,
                                      "value":1.2,
                                      "description":"parameter k1"},
                                    {
                                      "match":true,
                                      "value":0.75,
                                      "description":"parameter b"},
                                    {
                                      "match":true,
                                      "value":6.4128513,
                                      "description":"avgFieldLength"},
                                    {
                                      "match":true,
                                      "value":3.0,
                                      "description":"fieldLength"}]}]}]},
                        {
                          "match":true,
                          "value":3195.7617,

"description":"weight(search_en_root_name:serum in 20407) [SialBM25Similarity], result of:",
                          "details":[{
                              "match":true,
                              "value":3195.7617,
                              "description":"score(doc=20407,freq=1.0
= termFreq=1.0\n), product of:",
                              "details":[{
                                  "match":true,
                                  "value":2500.0,
                                  "description":"boost"},
                                {
                                  "match":true,
                                  "value":1.0,
                                  "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":245.0,
                                      "description":"docFreq"},
                                    {
                                      "match":true,
                                      "value":365301.0,
                                      "description":"docCount"}]},
                                {
                                  "match":true,
                                  "value":1.2783047,
                                  "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":1.0,
                                      "description":"termFreq=1.0"},
                                    {
                                      "match":true,
                                      "value":1.2,
                                      "description":"parameter k1"},
                                    {
                                      "match":true,
                                      "value":0.75,
                                      "description":"parameter b"},
                                    {
                                      "match":true,
                                      "value":6.4128513,
                                      "description":"avgFieldLength"},
                                    {
                                      "match":true,
                                      "value":3.0,
                                      "description":"fieldLength"}]}]}]},
                        {
                          "match":true,
                          "value":3195.7617,

"description":"weight(Synonym(search_en_root_name:albumin
search_en_root_name:albumina) in 20407) [SialBM25Similarity], result of:",
                          "details":[{
                              "match":true,
                              "value":3195.7617,
                              "description":"score(doc=20407,freq=1.0
= termFreq=1.0\n), product of:",
                              "details":[{
                                  "match":true,
                                  "value":2500.0,
                                  "description":"boost"},
                                {
                                  "match":true,
                                  "value":1.0,
                                  "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":167.0,
                                      "description":"docFreq"},
                                    {
                                      "match":true,
                                      "value":365301.0,
                                      "description":"docCount"}]},
                                {
                                  "match":true,
                                  "value":1.2783047,
                                  "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                  "details":[{
                                      "match":true,
                                      "value":1.0,
                                      "description":"termFreq=1.0"},
                                    {
                                      "match":true,
                                      "value":1.2,
                                      "description":"parameter k1"},
                                    {
                                      "match":true,
                                      "value":0.75,
                                      "description":"parameter b"},
                                    {
                                      "match":true,
                                      "value":6.4128513,
                                      "description":"avgFieldLength"},
                                    {
                                      "match":true,
                                      "value":3.0,
                                      "description":"fieldLength"}]}]}]}]},

The explain with the synonym
Scoring for Bovine Serum Albumin in search_en_root_name - With Synonym
{
  "responseHeader":{
    "zkConnected":true,
    "status":0,
    "QTime":1391,
    "params":{
      "mm":"2<-25%",
      "fl":"searchmv_pno, search_en_p_pri_name [explain style=nl]",
      "group.limit":"1",
      "q.op":"OR",
      "sort":"score desc,sort_en_name asc ,sort_ds asc,  search_pid asc",
      "group.ngroups":"true",
      "q":"bovine serum albumin",
      "tie":".45",
      "defType":"edismax",
      "group.sort":"sort_ds asc, score desc",
      "qf":"search_en_p_pri_name_min^7500
search_en_root_name_min^12000 search_en_p_pri_name^3000
search_pid^2500 searchmv_pno^2500 searchmv_cas_number^2500
searchmv_p_skus^2500 search_lform_lc^2500  search_en_root_name^2500
searchmv_en_s_pri_name^2500 searchmv_en_keywords^2500
searchmv_lookahead_terms^2000 searchmv_user_term^2000
searchmv_en_acronym^1500 searchmv_en_synonyms^1500
searchmv_concat_sku^1000 search_concat_pno^1000
searchmv_en_name_suf^1000 searchmv_component_cas^1000
search_lform^1000 searchmv_pno_genr^500 search_concat_pno_genr^500
searchmv_p_skus_genr^500 search_eform search_mol_form searchmv_component_molform searchmv_en_descriptions searchmv_en_chem_comp searchmv_en_attributes searchmv_en_page_title search_mdl_number searchmv_xref_comparable_pno searchmv_xref_comparable_sku searchmv_xref_equivalent_pno searchmv_xref_exact_pno searchmv_xref_exact_sku searchmv_vendor_sku searchmv_material_number search_en_sortkey searchmv_rtecs search_color_idx search_beilstein search_ecnumber search_egecnumber search_femanumber searchmv_isbn",
      "group.field":"id_s",
      "_":"1540331449276",
      "group":"true"}},
  "grouped":{
    "id_s":{
      "matches":9368,
      "ngroups":8552,
      "groups":[{
          "groupValue":"bovineserumalbumin123459048468",
          "doclist":{"numFound":57,"start":0,"docs":[
              {
                "search_en_p_pri_name":"Bovine Serum Albumin",
                "searchmv_pno":["A2153"],
                "[explain]":{
                  "match":true,
                  "value":64754.367,
                  "description":"max plus 0.45 times others of:",
                  "details":[{
                      "match":true,
                      "value":19174.57,
                      "description":"sum of:",
                      "details":[{
                          "match":true,
                          "value":9587.285,
                          "description":"sum of:",
                          "details":[{
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:bovine in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                      "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":204.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,
                                          "description":"fieldLength"}]}]}]},
                            {
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:serum in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                      "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":245.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,
                                          "description":"fieldLength"}]}]}]},
                            {
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:albumin in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                      "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":167.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,
                                          "description":"fieldLength"}]}]}]}]},
                        {
                          "match":true,
                          "value":9587.285,
                          "description":"sum of:",
                          "details":[{
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:bovin in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                      "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":204.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,
                                          "description":"fieldLength"}]}]}]},
                            {
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:serum in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                      "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":245.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,
                                          "description":"fieldLength"}]}]}]},
                            {
                              "match":true,
                              "value":3195.7617,

"description":"weight(search_en_root_name:albumin in 20407) [SialBM25Similarity], result of:",
                              "details":[{
                                  "match":true,
                                  "value":3195.7617,

"description":"score(doc=20407,freq=1.0 = termFreq=1.0\n), product of:",
                                  "details":[{
                                      "match":true,
                                      "value":2500.0,
                                      "description":"boost"},
                                    {
                                      "match":true,
                                     "value":1.0,
                                      "description":"idf, computed as
log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":167.0,
                                          "description":"docFreq"},
                                        {
                                          "match":true,
                                          "value":365301.0,
                                          "description":"docCount"}]},
                                    {
                                      "match":true,
                                      "value":1.2783047,
                                      "description":"tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength /
avgFieldLength)) from:",
                                      "details":[{
                                          "match":true,
                                          "value":1.0,
                                          "description":"termFreq=1.0"},
                                        {
                                          "match":true,
                                          "value":1.2,
                                          "description":"parameter k1"},
                                        {
                                          "match":true,
                                          "value":0.75,
                                          "description":"parameter b"},
                                        {
                                          "match":true,
                                          "value":6.4128513,
                                          "description":"avgFieldLength"},
                                        {
                                          "match":true,
                                          "value":3.0,

"description":"fieldLength"}]}]}]}]}]},
                    {