You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by Nicolas Peeters <ni...@gmail.com> on 2010/11/24 14:27:41 UTC

SOLR-792 (hierarchical faceting) issue when only 1 document should be present in the pivot

Hi Solr Community,

I've been experimenting with Solr 4.0 (trunk) in order to test the SOLR-792
feature. I have written a test that shows what I'm trying to ask. Basically,
I'm creating a hierarchy of the area/city/neighbourhood. The problem that I
see is that for documents that have only 1 item in a particular hierarchy
(e.g. Greater London/Greenwich/Centre (which I've called
"Value_that_cant_be_matched in this example"...)), these are not found by
the pivot facet. If I add a second one, then it works. I'm puzzled why this
is the case.

This is the result of the Sytem.out that prints out the pivot facet fields
hierarchy (see line 86)

PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
level1_loc_s=Greater London (8)
  level2_loc_s=London (5)
    level3_loc_s=Mayfair (3)
    level3_loc_s=Hammersmith (2)
  level2_loc_s=Greenwich (3)
    level3_loc_s=Greenwich Centre (2)
                                     //--> why isn't there a
"level3_loc_s=Value_that_cant_be_matched (1)" here?
level1_loc_s=Groot Amsterdam (5)
  level2_loc_s=Amsterdam (3)
    level3_loc_s=Jordaan (2)
  level2_loc_s=Amstelveen (2)
    level3_loc_s=Centrum (2)


How can I make sure that Solr would find in the tree the single document
when I facet on this "location" hierarchy?

Thank you very much for your help.

Nicolas

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.PivotField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

/**
 * This is a test for hiearchical faceting based on SOLR-792 (I basically
just checkout the trunk of Solr-4.0).
 *
 * Unit test that shows the particular behaviour that I'm experiencing.
 * I would have expected that the doc (see line 95) with as level3_loc_s
"Value_that_cant_be_matched" would appear in the pivot. It seems that you
actually need at least 2!
 *
 * @author npeeters
 */
public class HierarchicalPivotTest {

    CommonsHttpSolrServer server;

    @Before
    public void setup() throws MalformedURLException {
        // the instance can be reused
        this.server = new CommonsHttpSolrServer("http://localhost:8983/solr
");
        this.server.setSoTimeout(500); // socket read timeout
        this.server.setConnectionTimeout(100);
        this.server.setDefaultMaxConnectionsPerHost(100);
        this.server.setMaxTotalConnections(100);
        this.server.setFollowRedirects(false); // defaults to false
        // allowCompression defaults to false.
    }

    protected List<SolrInputDocument> createHierarchicalOrgData() {
        int id = 1;
        List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Jordaan"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
"level3_loc_s", "Jordaan"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
"level3_loc_s", "Centrum"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Hammersmith"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Hammersmith"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
"Mayfair"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Value_that_cant_be_matched"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Greenwich Centre"));
        docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
"level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
"level3_loc_s", "Greenwich Centre"));
        return docs;
    }

    @Test
    public void hierQueryWithOrgData() throws SolrServerException,
IOException {

        server.deleteByQuery("*:*");// delete everything!
        server.commit();
        assertNumFound("*:*", 0); // make sure it got in

        List<SolrInputDocument> docs = createHierarchicalOrgData();
        server.add(docs);
        server.commit();

        SolrQuery query = new SolrQuery("*:*");
        query.addFacetPivotField("level1_loc_s,level2_loc_s,level3_loc_s");
        query.setFacetMinCount(0);
        query.setRows(20);

        QueryResponse rsp = server.query(query);
        Assert.assertEquals(docs.size(), rsp.getResults().getNumFound());

        NamedList<List<PivotField>> pivots = rsp.getFacetPivot();
        Assert.assertEquals("Only one pivot is defined: '" + "level1_loc_s"
+ "," + "level2_loc_s" + "," + "level3_loc_s" + "'", 1, pivots.size());

        // debug the output
        for (Map.Entry<String, List<PivotField>> entry : pivots) {
            System.out.println("PIVOT: " + entry.getKey());
            for (PivotField p : entry.getValue()) {
                p.write(System.out, 0);
            }
            System.out.println();
            //here you can see already that there's not level3_loc_s for
Greenwich, which is not what I'd expect!
        }

        List<PivotField> pivot = pivots.getVal(0);
        Assert.assertEquals("level1_loc_s" + "," + "level2_loc_s" + "," +
"level3_loc_s", pivots.getName(0));
        Assert.assertEquals("The first level, there should be 2 pivots (one
for 'Greater London' and one for 'Groot Amsterdam'", 2,
                pivot.size());

        // level 1
        PivotField greaterLondon = pivot.get(0);
        Assert.assertEquals("level1_loc_s", greaterLondon.getField());
        Assert.assertEquals("Greater London", greaterLondon.getValue());
        Assert.assertEquals("8 locations under Great London", 8,
greaterLondon.getCount());

        // level 2
        List<PivotField> greaterLondonLocations = greaterLondon.getPivot();
        Assert.assertEquals("The next level in the Greater London hierarchy
has 2 elements: 'Greenwich' and 'London'", 2,
                greaterLondonLocations.size());
        Assert.assertEquals("level2_loc_s",
greaterLondonLocations.get(0).getField());
        Assert.assertEquals("London",
greaterLondonLocations.get(0).getValue());
        Assert.assertEquals("5 neighbourhoods under " +
greaterLondonLocations.get(0).getValue(), 5, greaterLondonLocations.get(0)
                .getCount());

        Assert.assertEquals("level2_loc_s",
greaterLondonLocations.get(0).getField());
        Assert.assertEquals("Greenwich",
greaterLondonLocations.get(1).getValue());
        Assert.assertEquals("3 neighbourhoods under " +
greaterLondonLocations.get(1).getValue(), 3, greaterLondonLocations.get(1)
                .getCount());

        // level 3 (London)
        List<PivotField> londonNeighbourhoods =
greaterLondonLocations.get(0).getPivot();
        List<PivotField> greenwichNeighbourhoods =
greaterLondonLocations.get(1).getPivot();

        Assert.assertEquals("The next level in the Greater London hierarchy
has 2 elements: 'Hammersmith' and 'Mayfair'", 2,
                greaterLondonLocations.size());
        Assert.assertEquals("level3_loc_s",
londonNeighbourhoods.get(1).getField());
        Assert.assertEquals("Mayfair",
londonNeighbourhoods.get(0).getValue());
        Assert.assertEquals("3 orgs in " +
londonNeighbourhoods.get(0).getValue(), 3,
londonNeighbourhoods.get(0).getCount());
        Assert.assertEquals("Hammersmith",
londonNeighbourhoods.get(1).getValue());
        Assert.assertEquals("2 orgs in " +
londonNeighbourhoods.get(1).getValue(), 2,
londonNeighbourhoods.get(1).getCount());

        Assert.assertEquals("Greenwich Centre",
greenwichNeighbourhoods.get(0).getValue());
        Assert.assertEquals("2 orgs in " +
greenwichNeighbourhoods.get(0).getValue(), 2,
greenwichNeighbourhoods.get(0).getCount());

        // NOT WORKING! - why?
        Assert.assertEquals("2 neighbourhoods in Greenwich: WHY IS THAT?",
2, greenwichNeighbourhoods.size());
        Assert.assertEquals("Value_that_cant_be_matched",
greenwichNeighbourhoods.get(1).getValue());
        Assert.assertEquals("Value_that_cant_be_matched" +
greenwichNeighbourhoods.get(1).getValue(), 1, greenwichNeighbourhoods.get(1)
                .getCount());
    }

    public static SolrInputDocument makeTestDoc(Object... kvp) {
        SolrInputDocument doc = new SolrInputDocument();
        for (int i = 0; i < kvp.length;) {
            String k = (String) kvp[i++];
            Object v = kvp[i++];
            doc.addField(k, v);
        }
        return doc;
    }

    protected void assertNumFound(String query, int num) throws
SolrServerException, IOException {
        QueryResponse rsp = server.query(new SolrQuery(query));
        if (num != rsp.getResults().getNumFound()) {
            Assert.fail("expected: " + num + " but had: " +
rsp.getResults().getNumFound() + " :: " + rsp.getResults());
        }
    }

}

Re: SOLR-792 (hierarchical faceting) issue when only 1 document should be present in the pivot

Posted by Adeel Qureshi <ad...@gmail.com>.
I have had the same problem .. my facet pivots was returning results
something like

Cat-A (3)
 Item X
 Item Y

only 2 items instead of 3

or even
Cat-B (2)
 no items

zero items instead of 2

so the parent level count didnt matched with the returned child pivots ..
but once I set the facet.pivot.mincount = 0 .. then it works fine .. is this
a bug or the desired behavior ..


On Wed, Nov 24, 2010 at 7:49 AM, Nicolas Peeters <ni...@gmail.com>wrote:

> Hi Solr-Users,
>
> I realized that I can get the behaviour that I expect if I put
> facet.pivot.mincount to 0. However, I'm still puzzled why this needs to be
> 0
> and not 1. There's one occurence for this document, isn't it?
> With this value to 1, the print out of the pivot looks like this (where you
> clearly see (1) for "Value_that_can't_be_matched"):
>
> PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
> level1_loc_s=Greater London (8)
>  level2_loc_s=London (5)
>    level3_loc_s=Mayfair (3)
>    level3_loc_s=Hammersmith (2)
>  level2_loc_s=Greenwich (3)
>    level3_loc_s=Greenwich Centre (2)
>     level3_loc_s=Value_that_cant_be_matched (1)
> level1_loc_s=Groot Amsterdam (5)
>  level2_loc_s=Amsterdam (3)
>    level3_loc_s=Jordaan (2)
>     level3_loc_s=Centrum (1)
>   level2_loc_s=Amstelveen (2)
>    level3_loc_s=Centrum (2)
>
> Any expert advice on why this is the case is more than welcome!
>
> Best regards,
>
> Nicolas
>
> On Wed, Nov 24, 2010 at 2:27 PM, Nicolas Peeters <nicolists@gmail.com
> >wrote:
>
> > Hi Solr Community,
> >
> > I've been experimenting with Solr 4.0 (trunk) in order to test the
> SOLR-792
> > feature. I have written a test that shows what I'm trying to ask.
> Basically,
> > I'm creating a hierarchy of the area/city/neighbourhood. The problem that
> I
> > see is that for documents that have only 1 item in a particular hierarchy
> > (e.g. Greater London/Greenwich/Centre (which I've called
> > "Value_that_cant_be_matched in this example"...)), these are not found by
> > the pivot facet. If I add a second one, then it works. I'm puzzled why
> this
> > is the case.
> >
> > This is the result of the Sytem.out that prints out the pivot facet
> fields
> > hierarchy (see line 86)
> >
> > PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
> > level1_loc_s=Greater London (8)
> >   level2_loc_s=London (5)
> >     level3_loc_s=Mayfair (3)
> >     level3_loc_s=Hammersmith (2)
> >   level2_loc_s=Greenwich (3)
> >     level3_loc_s=Greenwich Centre (2)
> >                                      //--> why isn't there a
> > "level3_loc_s=Value_that_cant_be_matched (1)" here?
> > level1_loc_s=Groot Amsterdam (5)
> >   level2_loc_s=Amsterdam (3)
> >     level3_loc_s=Jordaan (2)
> >   level2_loc_s=Amstelveen (2)
> >     level3_loc_s=Centrum (2)
> >
> >
> > How can I make sure that Solr would find in the tree the single document
> > when I facet on this "location" hierarchy?
> >
> > Thank you very much for your help.
> >
> > Nicolas
> >
> > import java.io.IOException;
> > import java.net.MalformedURLException;
> > import java.util.ArrayList;
> > import java.util.List;
> > import java.util.Map;
> >
> > import org.apache.solr.client.solrj.SolrQuery;
> > import org.apache.solr.client.solrj.SolrServerException;
> > import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
> > import org.apache.solr.client.solrj.response.PivotField;
> > import org.apache.solr.client.solrj.response.QueryResponse;
> > import org.apache.solr.common.SolrInputDocument;
> > import org.apache.solr.common.util.NamedList;
> > import org.junit.Assert;
> > import org.junit.Before;
> > import org.junit.Test;
> >
> > /**
> >  * This is a test for hiearchical faceting based on SOLR-792 (I basically
> > just checkout the trunk of Solr-4.0).
> >  *
> >  * Unit test that shows the particular behaviour that I'm experiencing.
> >  * I would have expected that the doc (see line 95) with as level3_loc_s
> > "Value_that_cant_be_matched" would appear in the pivot. It seems that you
> > actually need at least 2!
> >  *
> >  * @author npeeters
> >  */
> > public class HierarchicalPivotTest {
> >
> >     CommonsHttpSolrServer server;
> >
> >     @Before
> >     public void setup() throws MalformedURLException {
> >         // the instance can be reused
> >         this.server = new CommonsHttpSolrServer("
> > http://localhost:8983/solr");
> >         this.server.setSoTimeout(500); // socket read timeout
> >         this.server.setConnectionTimeout(100);
> >         this.server.setDefaultMaxConnectionsPerHost(100);
> >         this.server.setMaxTotalConnections(100);
> >         this.server.setFollowRedirects(false); // defaults to false
> >         // allowCompression defaults to false.
> >     }
> >
> >     protected List<SolrInputDocument> createHierarchicalOrgData() {
> >         int id = 1;
> >         List<SolrInputDocument> docs = new
> ArrayList<SolrInputDocument>();
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> > "level3_loc_s", "Centrum"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> > "level3_loc_s", "Jordaan"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> > "level3_loc_s", "Jordaan"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
> > "level3_loc_s", "Centrum"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
> > "level3_loc_s", "Centrum"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "London",
> "level3_loc_s",
> > "Hammersmith"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "London",
> "level3_loc_s",
> > "Hammersmith"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "London",
> "level3_loc_s",
> > "Mayfair"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "London",
> "level3_loc_s",
> > "Mayfair"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "London",
> "level3_loc_s",
> > "Mayfair"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> > "level3_loc_s", "Value_that_cant_be_matched"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> > "level3_loc_s", "Greenwich Centre"));
> >         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> > "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> > "level3_loc_s", "Greenwich Centre"));
> >         return docs;
> >     }
> >
> >     @Test
> >     public void hierQueryWithOrgData() throws SolrServerException,
> > IOException {
> >
> >         server.deleteByQuery("*:*");// delete everything!
> >         server.commit();
> >         assertNumFound("*:*", 0); // make sure it got in
> >
> >         List<SolrInputDocument> docs = createHierarchicalOrgData();
> >         server.add(docs);
> >         server.commit();
> >
> >         SolrQuery query = new SolrQuery("*:*");
> >
> query.addFacetPivotField("level1_loc_s,level2_loc_s,level3_loc_s");
> >         query.setFacetMinCount(0);
> >         query.setRows(20);
> >
> >         QueryResponse rsp = server.query(query);
> >         Assert.assertEquals(docs.size(), rsp.getResults().getNumFound());
> >
> >         NamedList<List<PivotField>> pivots = rsp.getFacetPivot();
> >         Assert.assertEquals("Only one pivot is defined: '" +
> "level1_loc_s"
> > + "," + "level2_loc_s" + "," + "level3_loc_s" + "'", 1, pivots.size());
> >
> >         // debug the output
> >         for (Map.Entry<String, List<PivotField>> entry : pivots) {
> >             System.out.println("PIVOT: " + entry.getKey());
> >             for (PivotField p : entry.getValue()) {
> >                 p.write(System.out, 0);
> >             }
> >             System.out.println();
> >             //here you can see already that there's not level3_loc_s for
> > Greenwich, which is not what I'd expect!
> >         }
> >
> >         List<PivotField> pivot = pivots.getVal(0);
> >         Assert.assertEquals("level1_loc_s" + "," + "level2_loc_s" + "," +
> > "level3_loc_s", pivots.getName(0));
> >         Assert.assertEquals("The first level, there should be 2 pivots
> (one
> > for 'Greater London' and one for 'Groot Amsterdam'", 2,
> >                 pivot.size());
> >
> >         // level 1
> >         PivotField greaterLondon = pivot.get(0);
> >         Assert.assertEquals("level1_loc_s", greaterLondon.getField());
> >         Assert.assertEquals("Greater London", greaterLondon.getValue());
> >         Assert.assertEquals("8 locations under Great London", 8,
> > greaterLondon.getCount());
> >
> >         // level 2
> >         List<PivotField> greaterLondonLocations =
> greaterLondon.getPivot();
> >         Assert.assertEquals("The next level in the Greater London
> hierarchy
> > has 2 elements: 'Greenwich' and 'London'", 2,
> >                 greaterLondonLocations.size());
> >         Assert.assertEquals("level2_loc_s",
> > greaterLondonLocations.get(0).getField());
> >         Assert.assertEquals("London",
> > greaterLondonLocations.get(0).getValue());
> >         Assert.assertEquals("5 neighbourhoods under " +
> > greaterLondonLocations.get(0).getValue(), 5,
> greaterLondonLocations.get(0)
> >                 .getCount());
> >
> >         Assert.assertEquals("level2_loc_s",
> > greaterLondonLocations.get(0).getField());
> >         Assert.assertEquals("Greenwich",
> > greaterLondonLocations.get(1).getValue());
> >         Assert.assertEquals("3 neighbourhoods under " +
> > greaterLondonLocations.get(1).getValue(), 3,
> greaterLondonLocations.get(1)
> >                 .getCount());
> >
> >         // level 3 (London)
> >         List<PivotField> londonNeighbourhoods =
> > greaterLondonLocations.get(0).getPivot();
> >         List<PivotField> greenwichNeighbourhoods =
> > greaterLondonLocations.get(1).getPivot();
> >
> >         Assert.assertEquals("The next level in the Greater London
> hierarchy
> > has 2 elements: 'Hammersmith' and 'Mayfair'", 2,
> >                 greaterLondonLocations.size());
> >         Assert.assertEquals("level3_loc_s",
> > londonNeighbourhoods.get(1).getField());
> >         Assert.assertEquals("Mayfair",
> > londonNeighbourhoods.get(0).getValue());
> >         Assert.assertEquals("3 orgs in " +
> > londonNeighbourhoods.get(0).getValue(), 3,
> > londonNeighbourhoods.get(0).getCount());
> >         Assert.assertEquals("Hammersmith",
> > londonNeighbourhoods.get(1).getValue());
> >         Assert.assertEquals("2 orgs in " +
> > londonNeighbourhoods.get(1).getValue(), 2,
> > londonNeighbourhoods.get(1).getCount());
> >
> >         Assert.assertEquals("Greenwich Centre",
> > greenwichNeighbourhoods.get(0).getValue());
> >         Assert.assertEquals("2 orgs in " +
> > greenwichNeighbourhoods.get(0).getValue(), 2,
> > greenwichNeighbourhoods.get(0).getCount());
> >
> >         // NOT WORKING! - why?
> >         Assert.assertEquals("2 neighbourhoods in Greenwich: WHY IS
> THAT?",
> > 2, greenwichNeighbourhoods.size());
> >         Assert.assertEquals("Value_that_cant_be_matched",
> > greenwichNeighbourhoods.get(1).getValue());
> >         Assert.assertEquals("Value_that_cant_be_matched" +
> > greenwichNeighbourhoods.get(1).getValue(), 1,
> greenwichNeighbourhoods.get(1)
> >                 .getCount());
> >     }
> >
> >     public static SolrInputDocument makeTestDoc(Object... kvp) {
> >         SolrInputDocument doc = new SolrInputDocument();
> >         for (int i = 0; i < kvp.length;) {
> >             String k = (String) kvp[i++];
> >             Object v = kvp[i++];
> >             doc.addField(k, v);
> >         }
> >         return doc;
> >     }
> >
> >     protected void assertNumFound(String query, int num) throws
> > SolrServerException, IOException {
> >         QueryResponse rsp = server.query(new SolrQuery(query));
> >         if (num != rsp.getResults().getNumFound()) {
> >             Assert.fail("expected: " + num + " but had: " +
> > rsp.getResults().getNumFound() + " :: " + rsp.getResults());
> >         }
> >     }
> >
> > }
> >
> >
>

Re: SOLR-792 (hierarchical faceting) issue when only 1 document should be present in the pivot

Posted by Nicolas Peeters <ni...@gmail.com>.
Hi Solr-Users,

I realized that I can get the behaviour that I expect if I put
facet.pivot.mincount to 0. However, I'm still puzzled why this needs to be 0
and not 1. There's one occurence for this document, isn't it?
With this value to 1, the print out of the pivot looks like this (where you
clearly see (1) for "Value_that_can't_be_matched"):

PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
level1_loc_s=Greater London (8)
  level2_loc_s=London (5)
    level3_loc_s=Mayfair (3)
    level3_loc_s=Hammersmith (2)
  level2_loc_s=Greenwich (3)
    level3_loc_s=Greenwich Centre (2)
    level3_loc_s=Value_that_cant_be_matched (1)
level1_loc_s=Groot Amsterdam (5)
  level2_loc_s=Amsterdam (3)
    level3_loc_s=Jordaan (2)
    level3_loc_s=Centrum (1)
  level2_loc_s=Amstelveen (2)
    level3_loc_s=Centrum (2)

Any expert advice on why this is the case is more than welcome!

Best regards,

Nicolas

On Wed, Nov 24, 2010 at 2:27 PM, Nicolas Peeters <ni...@gmail.com>wrote:

> Hi Solr Community,
>
> I've been experimenting with Solr 4.0 (trunk) in order to test the SOLR-792
> feature. I have written a test that shows what I'm trying to ask. Basically,
> I'm creating a hierarchy of the area/city/neighbourhood. The problem that I
> see is that for documents that have only 1 item in a particular hierarchy
> (e.g. Greater London/Greenwich/Centre (which I've called
> "Value_that_cant_be_matched in this example"...)), these are not found by
> the pivot facet. If I add a second one, then it works. I'm puzzled why this
> is the case.
>
> This is the result of the Sytem.out that prints out the pivot facet fields
> hierarchy (see line 86)
>
> PIVOT: level1_loc_s,level2_loc_s,level3_loc_s
> level1_loc_s=Greater London (8)
>   level2_loc_s=London (5)
>     level3_loc_s=Mayfair (3)
>     level3_loc_s=Hammersmith (2)
>   level2_loc_s=Greenwich (3)
>     level3_loc_s=Greenwich Centre (2)
>                                      //--> why isn't there a
> "level3_loc_s=Value_that_cant_be_matched (1)" here?
> level1_loc_s=Groot Amsterdam (5)
>   level2_loc_s=Amsterdam (3)
>     level3_loc_s=Jordaan (2)
>   level2_loc_s=Amstelveen (2)
>     level3_loc_s=Centrum (2)
>
>
> How can I make sure that Solr would find in the tree the single document
> when I facet on this "location" hierarchy?
>
> Thank you very much for your help.
>
> Nicolas
>
> import java.io.IOException;
> import java.net.MalformedURLException;
> import java.util.ArrayList;
> import java.util.List;
> import java.util.Map;
>
> import org.apache.solr.client.solrj.SolrQuery;
> import org.apache.solr.client.solrj.SolrServerException;
> import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
> import org.apache.solr.client.solrj.response.PivotField;
> import org.apache.solr.client.solrj.response.QueryResponse;
> import org.apache.solr.common.SolrInputDocument;
> import org.apache.solr.common.util.NamedList;
> import org.junit.Assert;
> import org.junit.Before;
> import org.junit.Test;
>
> /**
>  * This is a test for hiearchical faceting based on SOLR-792 (I basically
> just checkout the trunk of Solr-4.0).
>  *
>  * Unit test that shows the particular behaviour that I'm experiencing.
>  * I would have expected that the doc (see line 95) with as level3_loc_s
> "Value_that_cant_be_matched" would appear in the pivot. It seems that you
> actually need at least 2!
>  *
>  * @author npeeters
>  */
> public class HierarchicalPivotTest {
>
>     CommonsHttpSolrServer server;
>
>     @Before
>     public void setup() throws MalformedURLException {
>         // the instance can be reused
>         this.server = new CommonsHttpSolrServer("
> http://localhost:8983/solr");
>         this.server.setSoTimeout(500); // socket read timeout
>         this.server.setConnectionTimeout(100);
>         this.server.setDefaultMaxConnectionsPerHost(100);
>         this.server.setMaxTotalConnections(100);
>         this.server.setFollowRedirects(false); // defaults to false
>         // allowCompression defaults to false.
>     }
>
>     protected List<SolrInputDocument> createHierarchicalOrgData() {
>         int id = 1;
>         List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> "level3_loc_s", "Centrum"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> "level3_loc_s", "Jordaan"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amsterdam",
> "level3_loc_s", "Jordaan"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
> "level3_loc_s", "Centrum"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Groot Amsterdam", "level2_loc_s", "Amstelveen",
> "level3_loc_s", "Centrum"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
> "Hammersmith"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
> "Hammersmith"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
> "Mayfair"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
> "Mayfair"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "London", "level3_loc_s",
> "Mayfair"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> "level3_loc_s", "Value_that_cant_be_matched"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> "level3_loc_s", "Greenwich Centre"));
>         docs.add(makeTestDoc("id", id++, "name", "Organization " + id,
> "level1_loc_s", "Greater London", "level2_loc_s", "Greenwich",
> "level3_loc_s", "Greenwich Centre"));
>         return docs;
>     }
>
>     @Test
>     public void hierQueryWithOrgData() throws SolrServerException,
> IOException {
>
>         server.deleteByQuery("*:*");// delete everything!
>         server.commit();
>         assertNumFound("*:*", 0); // make sure it got in
>
>         List<SolrInputDocument> docs = createHierarchicalOrgData();
>         server.add(docs);
>         server.commit();
>
>         SolrQuery query = new SolrQuery("*:*");
>         query.addFacetPivotField("level1_loc_s,level2_loc_s,level3_loc_s");
>         query.setFacetMinCount(0);
>         query.setRows(20);
>
>         QueryResponse rsp = server.query(query);
>         Assert.assertEquals(docs.size(), rsp.getResults().getNumFound());
>
>         NamedList<List<PivotField>> pivots = rsp.getFacetPivot();
>         Assert.assertEquals("Only one pivot is defined: '" + "level1_loc_s"
> + "," + "level2_loc_s" + "," + "level3_loc_s" + "'", 1, pivots.size());
>
>         // debug the output
>         for (Map.Entry<String, List<PivotField>> entry : pivots) {
>             System.out.println("PIVOT: " + entry.getKey());
>             for (PivotField p : entry.getValue()) {
>                 p.write(System.out, 0);
>             }
>             System.out.println();
>             //here you can see already that there's not level3_loc_s for
> Greenwich, which is not what I'd expect!
>         }
>
>         List<PivotField> pivot = pivots.getVal(0);
>         Assert.assertEquals("level1_loc_s" + "," + "level2_loc_s" + "," +
> "level3_loc_s", pivots.getName(0));
>         Assert.assertEquals("The first level, there should be 2 pivots (one
> for 'Greater London' and one for 'Groot Amsterdam'", 2,
>                 pivot.size());
>
>         // level 1
>         PivotField greaterLondon = pivot.get(0);
>         Assert.assertEquals("level1_loc_s", greaterLondon.getField());
>         Assert.assertEquals("Greater London", greaterLondon.getValue());
>         Assert.assertEquals("8 locations under Great London", 8,
> greaterLondon.getCount());
>
>         // level 2
>         List<PivotField> greaterLondonLocations = greaterLondon.getPivot();
>         Assert.assertEquals("The next level in the Greater London hierarchy
> has 2 elements: 'Greenwich' and 'London'", 2,
>                 greaterLondonLocations.size());
>         Assert.assertEquals("level2_loc_s",
> greaterLondonLocations.get(0).getField());
>         Assert.assertEquals("London",
> greaterLondonLocations.get(0).getValue());
>         Assert.assertEquals("5 neighbourhoods under " +
> greaterLondonLocations.get(0).getValue(), 5, greaterLondonLocations.get(0)
>                 .getCount());
>
>         Assert.assertEquals("level2_loc_s",
> greaterLondonLocations.get(0).getField());
>         Assert.assertEquals("Greenwich",
> greaterLondonLocations.get(1).getValue());
>         Assert.assertEquals("3 neighbourhoods under " +
> greaterLondonLocations.get(1).getValue(), 3, greaterLondonLocations.get(1)
>                 .getCount());
>
>         // level 3 (London)
>         List<PivotField> londonNeighbourhoods =
> greaterLondonLocations.get(0).getPivot();
>         List<PivotField> greenwichNeighbourhoods =
> greaterLondonLocations.get(1).getPivot();
>
>         Assert.assertEquals("The next level in the Greater London hierarchy
> has 2 elements: 'Hammersmith' and 'Mayfair'", 2,
>                 greaterLondonLocations.size());
>         Assert.assertEquals("level3_loc_s",
> londonNeighbourhoods.get(1).getField());
>         Assert.assertEquals("Mayfair",
> londonNeighbourhoods.get(0).getValue());
>         Assert.assertEquals("3 orgs in " +
> londonNeighbourhoods.get(0).getValue(), 3,
> londonNeighbourhoods.get(0).getCount());
>         Assert.assertEquals("Hammersmith",
> londonNeighbourhoods.get(1).getValue());
>         Assert.assertEquals("2 orgs in " +
> londonNeighbourhoods.get(1).getValue(), 2,
> londonNeighbourhoods.get(1).getCount());
>
>         Assert.assertEquals("Greenwich Centre",
> greenwichNeighbourhoods.get(0).getValue());
>         Assert.assertEquals("2 orgs in " +
> greenwichNeighbourhoods.get(0).getValue(), 2,
> greenwichNeighbourhoods.get(0).getCount());
>
>         // NOT WORKING! - why?
>         Assert.assertEquals("2 neighbourhoods in Greenwich: WHY IS THAT?",
> 2, greenwichNeighbourhoods.size());
>         Assert.assertEquals("Value_that_cant_be_matched",
> greenwichNeighbourhoods.get(1).getValue());
>         Assert.assertEquals("Value_that_cant_be_matched" +
> greenwichNeighbourhoods.get(1).getValue(), 1, greenwichNeighbourhoods.get(1)
>                 .getCount());
>     }
>
>     public static SolrInputDocument makeTestDoc(Object... kvp) {
>         SolrInputDocument doc = new SolrInputDocument();
>         for (int i = 0; i < kvp.length;) {
>             String k = (String) kvp[i++];
>             Object v = kvp[i++];
>             doc.addField(k, v);
>         }
>         return doc;
>     }
>
>     protected void assertNumFound(String query, int num) throws
> SolrServerException, IOException {
>         QueryResponse rsp = server.query(new SolrQuery(query));
>         if (num != rsp.getResults().getNumFound()) {
>             Assert.fail("expected: " + num + " but had: " +
> rsp.getResults().getNumFound() + " :: " + rsp.getResults());
>         }
>     }
>
> }
>
>