You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@solr.apache.org by Sebastian Riemer <s....@littera.eu> on 2023/03/07 14:20:44 UTC

Using facet.limit in combination with grouping

Hello,

I have a question regarding the paramters "facet.limit" in combination with grouping.

My query looks like this:

http://localhost:8982/solr/#/wemi/query?q=*:*
&q.op=OR
&indent=true
&start=0
&facet=true
&facet.missing=true
&facet.sort=count
&facet.mincount=1
&fl=m_classificationList_lang_2_ts_ns_fac
&rows=0
&facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
&facet.contains=Ka
&facet.contains.ignoreCase=true
&facet.limit=400
&group.field=m_id_cp_s
&group=true
&group.facet=true
&group.ngroups=true
&fq=tenant_id:6
&fq=cat_db_id:2
&fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
&fq=m_markedAsDeleted_b:false

I use the parameter facet.limit in order to limit the result for the facet.field="m_classificationList_lang_2_ts_ns_fac" to 400 hits. However, instead of up to 400 results, I merely get 10 results.

...
"grouped":{
    "m_id_cp_s":{
      "matches":6,
      "ngroups":6,
      "groups":[]}},
  "facet_counts":{
    "facet_queries":{},
    "facet_fields":{
      "m_classificationList_lang_2_ts_ns_fac":[
        "Ckk Nordamerika",66,
        "Ckm Südamerika",43,
        "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
        "1D-I DVD / Kabarett",35,
        "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
        "Ckl Mittelamerika",20,
        "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",20,
        "Chm Südliches Afrika",18,
        "Chl Westafrika. Zentralafrika. Ostafrika",17,
        "Ep Geschichte Amerikas",17,
        null,35234]},
    "facet_ranges":{},
    "facet_intervals":{},
    "facet_heatmaps":{}}}

When I raise the value for the limit to 4000, I suddenly get some more results.

http://localhost:8982/solr/#/wemi/query?q=*:*
&q.op=OR
&indent=true
&start=0
&facet=true
&facet.missing=true
&facet.sort=count
&facet.mincount=1
&fl=m_classificationList_lang_2_ts_ns_fac
&rows=0
&facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
&facet.contains=Ka
&facet.contains.ignoreCase=true
&facet.limit=4000
&group.field=m_id_cp_s
&group=true
&group.facet=true
&group.ngroups=true
&fq=tenant_id:6
&fq=cat_db_id:2
&fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
&fq=m_markedAsDeleted_b:false

...
"grouped":{
    "m_id_cp_s":{
      "matches":6,
      "ngroups":6,
      "groups":[]}},
  "facet_counts":{
    "facet_queries":{},
    "facet_fields":{
      "m_classificationList_lang_2_ts_ns_fac":[
        "Ckk Nordamerika",66,
        "Ckm Südamerika",43,
        "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
        "1D-I DVD / Kabarett",35,
        "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
        "Ckl Mittelamerika",20,
        "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",20,
        "Chm Südliches Afrika",18,
        "Chl Westafrika. Zentralafrika. Ostafrika",17,
        "Ep Geschichte Amerikas",17,
        "Chn Afrikanische Inseln und Inselgruppen",14,
        "Chk Nordafrika",13,
        "Ybo Kampfsport, Kraftsport, Schwerathletik",13,
        "Cen Nordeuropa allgemein (Skandinavien)",10,
        "Xeo235 Kalte Küche, Brunch",10,
        "Mdp Weissagung (Tarot, Kartenlegen, Nummerologie)",8,
        "Eo Geschichte Afrikas",7,
        "Ka Religion (Allgemeines)",6,
        "Ch Afrika",1,
        null,35234]},
    "facet_ranges":{},
    "facet_intervals":{},
    "facet_heatmaps":{}}}

My best guess is, that the parameter facet.limit and grouping together, is simply not compatible to each other? When leaving out the grouping-related parameters, the limiting works as expected:

http://localhost:8982/solr/#/wemi/query?q=*:*
&q.op=OR
&indent=true
&start=0
&facet=true
&facet.missing=true
&facet.sort=count
&facet.mincount=1
&fl=m_classificationList_lang_2_ts_ns_fac
&rows=0
&facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
&facet.contains=Ka
&facet.contains.ignoreCase=true
&facet.limit=400
&fq=tenant_id:6
&fq=cat_db_id:2
&fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
&fq=m_markedAsDeleted_b:false

...
"facet_counts":{
    "facet_queries":{},
    "facet_fields":{
      "m_classificationList_lang_2_ts_ns_fac":[
        "Ckk Nordamerika",66,
        "Ckm Südamerika",44,
        "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
        "1D-I DVD / Kabarett",35,
        "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
        "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",21,
        "Ckl Mittelamerika",20,
        "Chm Südliches Afrika",19,
        "Chl Westafrika. Zentralafrika. Ostafrika",17,
        "Ep Geschichte Amerikas",17,
        "Chn Afrikanische Inseln und Inselgruppen",14,
        "Chk Nordafrika",13,
        "Ybo Kampfsport, Kraftsport, Schwerathletik",13,
        "Cen Nordeuropa allgemein (Skandinavien)",10,
        "Xeo235 Kalte Küche, Brunch",10,
        "Mdp Weissagung (Tarot, Kartenlegen, Nummerologie)",8,
        "Eo Geschichte Afrikas",7,
        "Ka Religion (Allgemeines)",6,
        "Ch Afrika",1,
        null,35234]},
    "facet_ranges":{},
    "facet_intervals":{},
    "facet_heatmaps":{}}}

This shows even better, when lastly I reduce the "facet.limit" to 5:

http://localhost:8982/solr/#/wemi/query?q=*:*
&q.op=OR
&indent=true
&start=0
&facet=true
&facet.missing=true
&facet.sort=count
&facet.mincount=1
&fl=m_classificationList_lang_2_ts_ns_fac
&rows=0
&facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
&facet.contains=Ka
&facet.contains.ignoreCase=true
&facet.limit=5
&fq=tenant_id:6
&fq=cat_db_id:2
&fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
&fq=m_markedAsDeleted_b:false

...
"facet_counts":{
    "facet_queries":{},
    "facet_fields":{
      "m_classificationList_lang_2_ts_ns_fac":[
        "Ckk Nordamerika",66,
        "Ckm Südamerika",44,
        "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
        "1D-I DVD / Kabarett",35,
        "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
        null,35234]},
    "facet_ranges":{},
    "facet_intervals":{},
    "facet_heatmaps":{}}}

This result deems correct!

My current workaround is, to just set facet.limit=-1, and do the limiting outside of solr.

Do you spot any obvious wrong-doing, or can you explain the nuances about faceting while grouping?

Best regards,

Sebastian Riemer

Re: Using facet.limit in combination with grouping

Posted by Mikhail Khludnev <mk...@apache.org>.
Hello, Sebastian.
I briefly looked through the code, haven't found any trivial problem. It
should work.
I don't know what's wrong.
https://github.com/apache/solr/blob/3baf1804df7b2b6f50ff151eba518b3e7f1e70cf/solr/core/src/java/org/apache/solr/request/SimpleFacets.java#L817

On Tue, Mar 7, 2023 at 5:21 PM Sebastian Riemer <s....@littera.eu> wrote:

> Hello,
>
> I have a question regarding the paramters "facet.limit" in combination
> with grouping.
>
> My query looks like this:
>
> http://localhost:8982/solr/#/wemi/query?q=*:*
> &q.op=OR
> &indent=true
> &start=0
> &facet=true
> &facet.missing=true
> &facet.sort=count
> &facet.mincount=1
> &fl=m_classificationList_lang_2_ts_ns_fac
> &rows=0
> &facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
> &facet.contains=Ka
> &facet.contains.ignoreCase=true
> &facet.limit=400
> &group.field=m_id_cp_s
> &group=true
> &group.facet=true
> &group.ngroups=true
> &fq=tenant_id:6
> &fq=cat_db_id:2
> &fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
> &fq=m_markedAsDeleted_b:false
>
> I use the parameter facet.limit in order to limit the result for the
> facet.field="m_classificationList_lang_2_ts_ns_fac" to 400 hits. However,
> instead of up to 400 results, I merely get 10 results.
>
> ...
> "grouped":{
>     "m_id_cp_s":{
>       "matches":6,
>       "ngroups":6,
>       "groups":[]}},
>   "facet_counts":{
>     "facet_queries":{},
>     "facet_fields":{
>       "m_classificationList_lang_2_ts_ns_fac":[
>         "Ckk Nordamerika",66,
>         "Ckm Südamerika",43,
>         "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
>         "1D-I DVD / Kabarett",35,
>         "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
>         "Ckl Mittelamerika",20,
>         "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",20,
>         "Chm Südliches Afrika",18,
>         "Chl Westafrika. Zentralafrika. Ostafrika",17,
>         "Ep Geschichte Amerikas",17,
>         null,35234]},
>     "facet_ranges":{},
>     "facet_intervals":{},
>     "facet_heatmaps":{}}}
>
> When I raise the value for the limit to 4000, I suddenly get some more
> results.
>
> http://localhost:8982/solr/#/wemi/query?q=*:*
> &q.op=OR
> &indent=true
> &start=0
> &facet=true
> &facet.missing=true
> &facet.sort=count
> &facet.mincount=1
> &fl=m_classificationList_lang_2_ts_ns_fac
> &rows=0
> &facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
> &facet.contains=Ka
> &facet.contains.ignoreCase=true
> &facet.limit=4000
> &group.field=m_id_cp_s
> &group=true
> &group.facet=true
> &group.ngroups=true
> &fq=tenant_id:6
> &fq=cat_db_id:2
> &fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
> &fq=m_markedAsDeleted_b:false
>
> ...
> "grouped":{
>     "m_id_cp_s":{
>       "matches":6,
>       "ngroups":6,
>       "groups":[]}},
>   "facet_counts":{
>     "facet_queries":{},
>     "facet_fields":{
>       "m_classificationList_lang_2_ts_ns_fac":[
>         "Ckk Nordamerika",66,
>         "Ckm Südamerika",43,
>         "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
>         "1D-I DVD / Kabarett",35,
>         "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
>         "Ckl Mittelamerika",20,
>         "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",20,
>         "Chm Südliches Afrika",18,
>         "Chl Westafrika. Zentralafrika. Ostafrika",17,
>         "Ep Geschichte Amerikas",17,
>         "Chn Afrikanische Inseln und Inselgruppen",14,
>         "Chk Nordafrika",13,
>         "Ybo Kampfsport, Kraftsport, Schwerathletik",13,
>         "Cen Nordeuropa allgemein (Skandinavien)",10,
>         "Xeo235 Kalte Küche, Brunch",10,
>         "Mdp Weissagung (Tarot, Kartenlegen, Nummerologie)",8,
>         "Eo Geschichte Afrikas",7,
>         "Ka Religion (Allgemeines)",6,
>         "Ch Afrika",1,
>         null,35234]},
>     "facet_ranges":{},
>     "facet_intervals":{},
>     "facet_heatmaps":{}}}
>
> My best guess is, that the parameter facet.limit and grouping together, is
> simply not compatible to each other? When leaving out the grouping-related
> parameters, the limiting works as expected:
>
> http://localhost:8982/solr/#/wemi/query?q=*:*
> &q.op=OR
> &indent=true
> &start=0
> &facet=true
> &facet.missing=true
> &facet.sort=count
> &facet.mincount=1
> &fl=m_classificationList_lang_2_ts_ns_fac
> &rows=0
> &facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
> &facet.contains=Ka
> &facet.contains.ignoreCase=true
> &facet.limit=400
> &fq=tenant_id:6
> &fq=cat_db_id:2
> &fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
> &fq=m_markedAsDeleted_b:false
>
> ...
> "facet_counts":{
>     "facet_queries":{},
>     "facet_fields":{
>       "m_classificationList_lang_2_ts_ns_fac":[
>         "Ckk Nordamerika",66,
>         "Ckm Südamerika",44,
>         "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
>         "1D-I DVD / Kabarett",35,
>         "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
>         "Ckl2 Kuba. Jamaika. Haiti. Dominikanische Rep. Puerto",21,
>         "Ckl Mittelamerika",20,
>         "Chm Südliches Afrika",19,
>         "Chl Westafrika. Zentralafrika. Ostafrika",17,
>         "Ep Geschichte Amerikas",17,
>         "Chn Afrikanische Inseln und Inselgruppen",14,
>         "Chk Nordafrika",13,
>         "Ybo Kampfsport, Kraftsport, Schwerathletik",13,
>         "Cen Nordeuropa allgemein (Skandinavien)",10,
>         "Xeo235 Kalte Küche, Brunch",10,
>         "Mdp Weissagung (Tarot, Kartenlegen, Nummerologie)",8,
>         "Eo Geschichte Afrikas",7,
>         "Ka Religion (Allgemeines)",6,
>         "Ch Afrika",1,
>         null,35234]},
>     "facet_ranges":{},
>     "facet_intervals":{},
>     "facet_heatmaps":{}}}
>
> This shows even better, when lastly I reduce the "facet.limit" to 5:
>
> http://localhost:8982/solr/#/wemi/query?q=*:*
> &q.op=OR
> &indent=true
> &start=0
> &facet=true
> &facet.missing=true
> &facet.sort=count
> &facet.mincount=1
> &fl=m_classificationList_lang_2_ts_ns_fac
> &rows=0
> &facet.field=%7B!ex%3Dm_cl_0%7Dm_classificationList_lang_2_ts_ns_fac
> &facet.contains=Ka
> &facet.contains.ignoreCase=true
> &facet.limit=5
> &fq=tenant_id:6
> &fq=cat_db_id:2
> &fq=%7B!tag%3Dm_cl_0%7D(((m_classificationList_lang_2_ts_ns:(Ka))))
> &fq=m_markedAsDeleted_b:false
>
> ...
> "facet_counts":{
>     "facet_queries":{},
>     "facet_fields":{
>       "m_classificationList_lang_2_ts_ns_fac":[
>         "Ckk Nordamerika",66,
>         "Ckm Südamerika",44,
>         "Xbp2 Zucht und Haltung von Hunden und Katzen",43,
>         "1D-I DVD / Kabarett",35,
>         "Ak Kommunikation, Medien, Publizistik, Journalismus",25,
>         null,35234]},
>     "facet_ranges":{},
>     "facet_intervals":{},
>     "facet_heatmaps":{}}}
>
> This result deems correct!
>
> My current workaround is, to just set facet.limit=-1, and do the limiting
> outside of solr.
>
> Do you spot any obvious wrong-doing, or can you explain the nuances about
> faceting while grouping?
>
> Best regards,
>
> Sebastian Riemer
>


-- 
Sincerely yours
Mikhail Khludnev
https://t.me/MUST_SEARCH
A caveat: Cyrillic!