You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2014/08/13 20:23:54 UTC
svn commit: r1617789 [3/3] - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/handler/component/
core/src/java/org/apache/solr/request/ core/src/java/org/apache/solr/util/
core/src/test/org/apache/solr/ core/src/test/org/apache/solr/cloud/ c...
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java?rev=1617789&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java Wed Aug 13 18:23:53 2014
@@ -0,0 +1,762 @@
+package org.apache.solr.handler.component;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+import java.util.List;
+import java.io.IOException;
+
+import org.apache.solr.BaseDistributedSearchTestCase;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.PivotField;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.FacetParams;
+import org.apache.solr.common.params.SolrParams;
+
+import junit.framework.AssertionFailedError;
+
+public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCase {
+
+ public static final String SPECIAL = "";
+
+ public DistributedFacetPivotLargeTest() {
+ this.fixShardCount = true;
+ this.shardCount = 4; // we leave one empty as an edge case
+ }
+
+ @Override
+ public void doTest() throws Exception {
+ this.stress = 0 ;
+ handle.clear();
+ handle.put("QTime", SKIPVAL);
+ handle.put("timestamp", SKIPVAL);
+ handle.put("maxScore", SKIPVAL);
+
+ setupDistributedPivotFacetDocuments();
+
+ QueryResponse rsp = null;
+
+ List<PivotField> pivots = null;
+ PivotField firstInt = null;
+ PivotField firstBool = null;
+ PivotField firstDate = null;
+ PivotField firstPlace = null;
+ PivotField firstCompany = null;
+
+ // basic check w/ limit & default sort (count)
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_LIMIT, "12");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(12, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ // Microsoft will come back wrong if refinement was not done correctly
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
+
+ // trivial mincount=0 check
+ rsp = query( "q", "does_not_exist_s:foo",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","company_t",
+ FacetParams.FACET_LIMIT, "10",
+ FacetParams.FACET_PIVOT_MINCOUNT,"0");
+ pivots = rsp.getFacetPivot().get("company_t");
+ assertEquals(10, pivots.size());
+ for (PivotField p : pivots) {
+ assertEquals(0, p.getCount());
+ }
+
+ // sanity check limit=0 w/ mincount=0 & missing=true
+ //
+ // SOLR-6328: doesn't work for single node, so can't work for distrib either (yet)
+ //
+ // PivotFacetField's init of needRefinementAtThisLevel as needing potential change
+ //
+ // rsp = query( "q", "*:*",
+ // "rows", "0",
+ // "facet","true",
+ // "f.company_t.facet.limit", "10",
+ // "facet.pivot","special_s,bogus_s,company_t",
+ // "facet.missing", "true",
+ // FacetParams.FACET_LIMIT, "0",
+ // FacetParams.FACET_PIVOT_MINCOUNT,"0");
+ // pivots = rsp.getFacetPivot().get("special_s,bogus_s,company_t");
+ // assertEquals(1, pivots.size()); // only the missing
+ // assertPivot("special_s", null, docNumber - 5, pivots.get(0)); // 5 docs w/special_s
+ // assertEquals(pivots.toString(), 1, pivots.get(0).getPivot());
+ // assertPivot("bogus_s", null, docNumber, pivots.get(0).getPivot().get(0));
+ // // TODO: some asserts on company results
+
+ // basic check w/ default sort, limit, & mincount==0
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_LIMIT, "50",
+ FacetParams.FACET_PIVOT_MINCOUNT,"0");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(50, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ // Microsoft will come back wrong if refinement was not done correctly
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
+
+ // sort=index + offset + limit w/ some variables
+ for (SolrParams variableParams :
+ new SolrParams[] { // bother variations should kwrk just as well
+ // defauts
+ params(),
+ // force refinement
+ params(FacetParams.FACET_OVERREQUEST_RATIO, "1",
+ FacetParams.FACET_OVERREQUEST_COUNT, "0") }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.sort","index",
+ "f.place_s.facet.limit", "20",
+ "f.place_s.facet.offset", "40",
+ "facet.pivot", "place_s,company_t"),
+ variableParams );
+
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(20, pivots.size()); // limit
+ for (int i = 0; i < 10; i++) {
+ PivotField place = pivots.get(i);
+ assertTrue(place.toString(), place.getValue().toString().endsWith("placeholder"));
+ assertEquals(3, place.getPivot().size());
+ assertPivot("company_t", "bbc", 6, place.getPivot().get(0));
+ assertPivot("company_t", "microsoft", 6, place.getPivot().get(1));
+ assertPivot("company_t", "polecat", 6, place.getPivot().get(2));
+ }
+ assertPivot("place_s", "cardiff", 257, pivots.get(10));
+ assertPivot("place_s", "krakaw", 1, pivots.get(11));
+ assertPivot("place_s", "medical staffing network holdings, inc.", 51, pivots.get(12));
+ for (int i = 13; i < 20; i++) {
+ PivotField place = pivots.get(i);
+ assertTrue(place.toString(), place.getValue().toString().startsWith("placeholder"));
+ assertEquals(1, place.getPivot().size());
+ PivotField company = place.getPivot().get(0);
+ assertTrue(company.toString(), company.getValue().toString().startsWith("compholder"));
+ assertEquals(company.toString(), 1, company.getCount());
+ }
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // sort=index + mincount=0
+ //
+ // SOLR-6329: facet.pivot.mincount=0 doesn't work well with distrib
+ //
+ // broken honda
+ //
+ // This is tricky, here's what i think is happening....
+ // - "company:honda" only exists on twoShard, and only w/ "place:cardiff"
+ // - twoShard has no other places in it's docs
+ // - twoShard can't return any other places to w/ honda as a count=0 sub-value
+ // - if we refined all other companies places, would twoShard return honda==0 ?
+ // ... but there's no refinement since mincount==0
+ // - would it even matter
+ //
+ // should we remove the refinement short circut?
+ //
+ // rsp = query( params( "q", "*:*",
+ // "rows", "0",
+ // "facet","true",
+ // "facet.sort","index",
+ // "f.place_s.facet.limit", "20",
+ // "f.place_s.facet.offset", "40",
+ // FacetParams.FACET_PIVOT_MINCOUNT,"0",
+ // "facet.pivot", "place_s,company_t") );
+ // // TODO: more asserts
+ //
+ //
+ // really trivial demonstration of the above problem
+ //
+ // rsp = query( params( "q", "*:*",
+ // "rows", "0",
+ // "facet","true",
+ // FacetParams.FACET_PIVOT_MINCOUNT,"0",
+ // "facet.pivot", "top_s,sub_s") );
+
+ // basic check w/ limit & index sort
+ for (SolrParams facetParams :
+ // results should be the same regardless of wether local params are used
+ new SolrParams[] {
+ // Broken: SOLR-6193
+ // params("facet.pivot","{!facet.limit=4 facet.sort=index}place_s,company_t"),
+ // params("facet.pivot","{!facet.sort=index}place_s,company_t",
+ // FacetParams.FACET_LIMIT, "4"),
+ params("facet.pivot","place_s,company_t",
+ FacetParams.FACET_LIMIT, "4",
+ "facet.sort", "index") }) {
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true"),
+ facetParams );
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "0placeholder", 6, firstPlace);
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 6, firstCompany);
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // Pivot Faceting (combined wtih Field Faceting)
+ for (SolrParams facetParams :
+ // with and w/o an excluded fq
+ // (either way, facet results should be the same)
+ new SolrParams[] {
+ params("facet.pivot","place_s,company_t",
+ "facet.field","place_s"),
+ params("facet.pivot","{!ex=ok}place_s,company_t",
+ "facet.field","{!ex=ok}place_s",
+ "fq","{!tag=ok}place_s:cardiff"),
+ params("facet.pivot","{!ex=pl,co}place_s,company_t",
+ "fq","{!tag=pl}place_s:cardiff",
+ "fq","{!tag=co}company_t:bbc") }) {
+
+ // default order (count)
+ rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
+ "rows", "0",
+ "facet","true",
+ FacetParams.FACET_LIMIT, "4"),
+ facetParams) );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 101, firstCompany);
+
+ // Index Order
+ rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
+ "rows", "0",
+ "facet","true",
+ FacetParams.FACET_LIMIT, "4",
+ "facet.sort", "index"),
+ facetParams) );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "0placeholder", 6, firstPlace);
+ assertEquals(3, firstPlace.getPivot().size()); // num vals in data < limit==3
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 6, firstCompany);
+
+ // Field level limits
+ rsp = query( SolrParams.wrapDefaults(params("q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "f.place_s.facet.limit","2",
+ "f.company_t.facet.limit","4"),
+ facetParams) );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(2, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 101, firstCompany);
+ }
+
+ // Pivot Faceting Count w/fq (not excluded)
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "fq","place_s:cardiff",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(1, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 101, firstCompany);
+
+
+ // Same Pivot - one with exclusion and one w/o
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "fq","{!tag=ff}pay_i:[2000 TO *]",
+ "facet","true",
+ "facet.pivot","{!key=filt}place_s,company_t",
+ "facet.pivot","{!key=nofilt ex=ff}place_s,company_t",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("filt");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 105, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
+ //
+ pivots = rsp.getFacetPivot().get("nofilt");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(1));
+
+ // Same Pivot - one in default (count) order and one in index order
+ //
+ // Broken: SOLR-6193 - the facet.sort localparam isn't being picked up correctly
+ //
+ // rsp = query( "q", "*:*",
+ // "rows", "0",
+ // "facet","true",
+ // "fq","pay_i:[2000 TO *]",
+ // "facet.pivot","{!key=sc}place_s,company_t",
+ // "facet.pivot","{!key=si facet.sort=index}place_s,company_t",
+ // FacetParams.FACET_LIMIT, "4");
+ // pivots = rsp.getFacetPivot().get("sc");
+ // assertEquals(4, pivots.size());
+ // firstPlace = pivots.get(0);
+ // assertPivot("place_s", "cardiff", 105, firstPlace);
+ // assertEquals(4, firstPlace.getPivot().size());
+ // assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ // assertPivot("company_t", "microsoft", 54, firstPlace.getPivot().get(1));
+ // //
+ // pivots = rsp.getFacetPivot().get("si");
+ // assertEquals(4, pivots.size());
+ // firstPlace = pivots.get(0);
+ // assertPivot("place_s", "0placeholder", 6, firstPlace);
+ // assertEquals(3, firstPlace.getPivot().size()); // only 3 in the data < facet.limit
+ // assertPivot("company_t", "bbc", 6, firstPlace.getPivot().get(0));
+ // assertPivot("company_t", "microsoft", 6, firstPlace.getPivot().get(1));
+
+
+ // Field level limits and small offset
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ "f.place_s.facet.limit","2",
+ "f.company_t.facet.limit","4",
+ "facet.offset","1");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(2, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
+ assertEquals(2, firstPlace.getPivot().size()); // num vals in data < limit==4
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 50, firstCompany);
+
+
+ // Field level offsets and limit
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "fq","{!tag=pl}place_s:cardiff",
+ "facet","true",
+ "facet.pivot","{!ex=pl}place_s,company_t",
+ "f.place_s.facet.offset","1",
+ "f.company_t.facet.offset","2",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(4, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "medical staffing network holdings, inc.", 51, firstPlace);
+ assertEquals(1, firstPlace.getPivot().size()); // num vals in data < limit==4
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "polecat", 50, firstCompany);
+
+
+ // datetime
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","hiredate_dt,place_s,company_t",
+ "f.hiredate_dt.facet.limit","2",
+ "f.hiredate_dt.facet.offset","1",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("hiredate_dt,place_s,company_t");
+ assertEquals(2, pivots.size());
+ firstDate = pivots.get(0); // 2012-09-01T12:30:00Z
+ assertPivot("hiredate_dt", new Date(1346502600000L), 200, firstDate);
+ assertEquals(1, firstDate.getPivot().size()); // num vals in data < limit==4
+ firstPlace = firstDate.getPivot().get(0);
+ assertPivot("place_s", "cardiff", 200, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 50, firstCompany);
+
+ // int
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","pay_i,place_s,company_t",
+ "f.pay_i.facet.limit","2",
+ "f.pay_i.facet.offset","1",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("pay_i,place_s,company_t");
+ assertEquals(2, pivots.size());
+ firstInt = pivots.get(0);
+ assertPivot("pay_i", 2000, 50, firstInt);
+ assertEquals(4, firstInt.getPivot().size());
+ firstPlace = firstInt.getPivot().get(0);
+ assertPivot("place_s", "0placeholder", 1, firstPlace);
+ assertEquals(3, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 1, firstCompany);
+
+ // boolean
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","real_b,place_s,company_t",
+ "f.real_b.facet.missing","true",
+ "f.real_b.facet.limit","2",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("real_b,place_s,company_t");
+ assertEquals(3, pivots.size());
+ firstBool = pivots.get(0);
+ assertPivot("real_b", false, 300, firstBool);
+ assertEquals(4, firstBool.getPivot().size());
+ firstPlace = firstBool.getPivot().get(0);
+ assertPivot("place_s", "0placeholder", 6, firstPlace);
+ assertEquals(3, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 6, firstCompany);
+
+ // bogus fields
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","doesntexist_t,neitherdoi_i",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
+ assertEquals(0, pivots.size());
+
+ // bogus fields with facet.missing
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","doesntexist_t,neitherdoi_i",
+ "facet.missing", "true",
+ FacetParams.FACET_LIMIT, "4");
+ pivots = rsp.getFacetPivot().get("doesntexist_t,neitherdoi_i");
+ assertEquals(1, pivots.size());
+ assertPivot("doesntexist_t", null, docNumber, pivots.get(0));
+ assertEquals(1, pivots.get(0).getPivot().size());
+ assertPivot("neitherdoi_i", null, docNumber, pivots.get(0).getPivot().get(0));
+
+ // Negative facet limit
+ for (SolrParams facetParams :
+ // results should be the same regardless of wether facet.limit is global,
+ // a local param, or specified as a per-field override for both fields
+ new SolrParams[] {
+ params(FacetParams.FACET_LIMIT, "-1",
+ "facet.pivot","place_s,company_t"),
+ // Broken: SOLR-6193
+ // params("facet.pivot","{!facet.limit=-1}place_s,company_t"),
+ params("f.place_s.facet.limit", "-1",
+ "f.company_t.facet.limit", "-1",
+ "facet.pivot","place_s,company_t") }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.sort", "count" ),
+ facetParams);
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(103, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(54, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t","bbc", 101, firstCompany);
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // Negative per-field facet limit (outer)
+ for (SolrParams facetParams :
+ // results should be the same regardless of wether per-field facet.limit is
+ // a global or a local param
+ new SolrParams[] {
+ // Broken: SOLR-6193
+ // params( "facet.pivot","{!f.id.facet.limit=-1}place_s,id" ),
+ params( "facet.pivot","place_s,id",
+ "f.id.facet.limit", "-1") }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.sort", "count" ),
+ facetParams);
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,id");
+ assertEquals(100, pivots.size()); // default
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(257, firstPlace.getPivot().size());
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // Negative per-field facet limit (inner)
+ for (SolrParams facetParams :
+ // results should be the same regardless of wether per-field facet.limit is
+ // a global or a local param
+ new SolrParams[] {
+ // Broken: SOLR-6193
+ // params( "facet.pivot","{!f.place_s.facet.limit=-1}place_s,id" ),
+ params( "facet.pivot","place_s,id",
+ "f.place_s.facet.limit", "-1") }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.sort", "count" ),
+ facetParams);
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,id");
+ assertEquals(103, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(100, firstPlace.getPivot().size()); // default
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // Mincount + facet.pivot 2 different ways (swap field order)
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ "facet.pivot","company_t,place_s",
+ FacetParams.FACET_PIVOT_MINCOUNT,"6");
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ assertEquals(52, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(4, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "bbc", 101, firstCompany);
+ //
+ pivots = rsp.getFacetPivot().get("company_t,place_s");
+ assertEquals(4, pivots.size());
+ firstCompany = pivots.get(0);
+ assertPivot("company_t", "bbc", 451, firstCompany);
+ assertEquals(52, firstCompany.getPivot().size());
+ firstPlace = firstCompany.getPivot().get(0);
+ assertPivot("place_s", "cardiff", 101, firstPlace);
+
+ // refine on SPECIAL empty string
+ rsp = query( "q", "*:*",
+ "fq", "-place_s:0placeholder",
+ "rows", "0",
+ "facet","true",
+ "facet.limit","1",
+ FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
+ FacetParams.FACET_OVERREQUEST_COUNT, "1", // force refinement
+ "facet.pivot","special_s,company_t");
+ assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
+ pivots = rsp.getFacetPivot().get("special_s,company_t");
+ assertEquals(1, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("special_s", SPECIAL, 3, firstPlace);
+ assertEquals(1, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "microsoft", 2, firstCompany);
+
+ // TODO test "company_t,special_s" as well
+
+
+ // refine on SPECIAL empty string & facet.missing
+ // Also proves refinement on non-top elements occurs and allows them to get into the top
+ rsp = query( "q", "*:*",
+ "fq", "-place_s:0placeholder",
+ "rows", "0",
+ "facet","true",
+ "facet.limit","1",
+ "facet.missing","true",
+ FacetParams.FACET_OVERREQUEST_RATIO, "0", // force refinement
+ FacetParams.FACET_OVERREQUEST_COUNT, "2", // force refinement
+ "facet.pivot","special_s,company_t");
+ assertEquals(docNumber - 6, rsp.getResults().getNumFound()); // all docs but 0place
+ pivots = rsp.getFacetPivot().get("special_s,company_t");
+ assertEquals(2, pivots.size());
+ firstPlace = pivots.get(0);
+ assertPivot("special_s", SPECIAL, 3, firstPlace);
+ assertEquals(1, firstPlace.getPivot().size());
+ firstCompany = firstPlace.getPivot().get(0);
+ assertPivot("company_t", "microsoft", 2, firstCompany);
+ // last is "missing" val
+ assertPivot("special_s", null, docNumber -6 -3 -2, pivots.get(1)); // -0place -SPECIAL -xxx
+
+ // forced refinement on facet.missing
+ rsp = query( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "f.bogus_x_s.facet.missing","true",
+ "f.bogus_y_s.facet.missing","true",
+ "facet.pivot","bogus_x_s,place_s,bogus_y_s,company_t",
+ FacetParams.FACET_LIMIT, "12");
+ pivots = rsp.getFacetPivot().get("bogus_x_s,place_s,bogus_y_s,company_t");
+ assertEquals(1, pivots.size()); // just the missing value for bogus_x_s
+ assertPivot("bogus_x_s", null, docNumber, pivots.get(0));
+ pivots = pivots.get(0).getPivot();
+ assertEquals(12, pivots.size()); // places
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 257, firstPlace);
+ assertEquals(1, firstPlace.getPivot().size()); // just the missing value for bogus_y_s
+ assertPivot("bogus_y_s", null, 257, firstPlace.getPivot().get(0));
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0).getPivot().get(0));
+ // Microsoft will come back wrong if refinement was not done correctly
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(0).getPivot().get(1));
+
+
+
+
+
+ // Overrequesting a lot
+ this.query( "q", "*:*",
+ "rows", "0",
+ "facet", "true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_OVERREQUEST_RATIO, "10",
+ FacetParams.FACET_OVERREQUEST_COUNT, "100");
+
+ // Overrequesting off
+ this.query( "q", "*:*",
+ "rows", "0",
+ "facet", "true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_OVERREQUEST_RATIO, "0",
+ FacetParams.FACET_OVERREQUEST_COUNT, "0");
+
+ }
+
+ /**
+ * asserts that the actual PivotField matches the expected criteria
+ */
+ private void assertPivot(String field, Object value, int count, // int numKids,
+ PivotField actual) {
+ assertEquals("FIELD: " + actual.toString(), field, actual.getField());
+ assertEquals("VALUE: " + actual.toString(), value, actual.getValue());
+ assertEquals("COUNT: " + actual.toString(), count, actual.getCount());
+ // TODO: add arg && assert on number of kids
+ //assertEquals("#KIDS: " + actual.toString(), numKids, actual.getPivot().size());
+ }
+
+
+
+ private void setupDistributedPivotFacetDocuments() throws Exception{
+
+ //Clear docs
+ del("*:*");
+ commit();
+
+ final int maxDocs = 50;
+ final SolrServer zeroShard = clients.get(0);
+ final SolrServer oneShard = clients.get(1);
+ final SolrServer twoShard = clients.get(2);
+ final SolrServer threeShard = clients.get(3); // edge case: never gets any matching docs
+
+ for(Integer i=0;i<maxDocs;i++){//50 entries
+ addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z","real_b","true");
+ addPivotDoc(zeroShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-07-01T12:30:00Z");
+
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "placeholder"+i, "company_t", "compHolder"+i,"pay_i",24*i,"hiredate_dt", "2012-08-01T12:30:00Z");
+
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "bbc honda","pay_i",2400,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",22*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",21*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "compHolder"+i,"pay_i",20*i,"hiredate_dt", "2012-09-01T12:30:00Z","real_b","true");
+
+ //For the filler content
+ //Fifty places with 6 results each
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3100,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",3400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",5400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",6400,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", i+"placeholder", "company_t", "microsoft polecat bbc","pay_i",2000,"hiredate_dt", "2012-10-01T12:30:00Z","real_b","false");
+
+ }
+
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",4367,"hiredate_dt", "2012-11-01T12:30:00Z");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft bbc","pay_i",8742,"hiredate_dt", "2012-11-01T12:30:00Z");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft polecat","pay_i",5824,"hiredate_dt", "2012-11-01T12:30:00Z");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "medical staffing network holdings, inc.", "company_t", "microsoft ","pay_i",6539,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", "xxx");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-1-01T12:30:00Z", "special_s", "xxx");
+ addPivotDoc(oneShard, "id", getDocNum(), "place_s", "krakaw", "company_t", "polecat","pay_i",4352,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
+
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",12,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
+ addPivotDoc(twoShard, "id", getDocNum(), "place_s", "cardiff", "company_t", "microsoft","pay_i",543,"hiredate_dt", "2012-11-01T12:30:00Z", "special_s", SPECIAL);
+
+
+ // two really trivial documents, unrelated to the rest of the tests,
+ // for the purpose of demoing the porblem with mincount=0
+ addPivotDoc(oneShard, "id", getDocNum(), "top_s", "aaa", "sub_s", "bbb" );
+ addPivotDoc(twoShard, "id", getDocNum(), "top_s", "xxx", "sub_s", "yyy" );
+
+
+ commit();
+
+ assertEquals("shard #3 should never have any docs",
+ 0, threeShard.query(params("q", "*:*")).getResults().getNumFound());
+ }
+
+ /**
+ * Builds up a SolrInputDocument using the specified fields, then adds it to the
+ * specified client as well as the control client
+ * @see #indexDoc(SolrServer,SolrParams,SolrInputDocument...)
+ * @see #sdoc
+ */
+ private void addPivotDoc(SolrServer client, Object... fields)
+ throws IOException, SolrServerException {
+
+ indexDoc(client, params(), sdoc(fields));
+ }
+
+ private int docNumber = 0;
+
+ public int getDocNum(){
+ docNumber++;
+ return docNumber;
+ }
+
+}
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java?rev=1617789&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLongTailTest.java Wed Aug 13 18:23:53 2014
@@ -0,0 +1,289 @@
+package org.apache.solr.handler.component;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Date;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.io.IOException;
+
+import org.apache.solr.BaseDistributedSearchTestCase;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.response.PivotField;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.FacetParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+
+/**
+ * test demonstrating how overrequesting helps finds top-terms in the "long tail"
+ * of shards that don't have even distributions of terms (something that can be common
+ * in cases of custom sharding -- even if you don't know that there is a corrolation
+ * between the property you are sharding on and the property you are faceting on).
+ *
+ * NOTE: This test ignores the control collection (in single node mode, there is no
+ * need for the overrequesting, all the data is local -- so comparisons with it wouldn't
+ * be valid in the cases we are testing here)
+ */
+public class DistributedFacetPivotLongTailTest extends BaseDistributedSearchTestCase {
+
+ public DistributedFacetPivotLongTailTest(){
+ this.fixShardCount = true;
+ this.shardCount = 3;
+ }
+
+ private int docNumber = 0;
+
+ public int getDocNum() {
+ docNumber++;
+ return docNumber;
+ }
+
+ @Override
+ public void doTest() throws Exception {
+
+ final SolrServer shard0 = clients.get(0);
+ final SolrServer shard1 = clients.get(1);
+ final SolrServer shard2 = clients.get(2);
+
+ // the 5 top foo_s terms have 100 docs each on every shard
+ for (int i = 0; i < 100; i++) {
+ for (int j = 0; j < 5; j++) {
+ shard0.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
+ shard1.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
+ shard2.add(sdoc("id", getDocNum(), "foo_s", "aaa"+j));
+ }
+ }
+
+ // 20 foo_s terms that come in "second" with 50 docs each
+ // on both shard0 & shard1 ("bbb_")
+ for (int i = 0; i < 50; i++) {
+ for (int j = 0; j < 20; j++) {
+ shard0.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
+ shard1.add(sdoc("id", getDocNum(), "foo_s", "bbb"+j));
+ }
+ // distracting term appears on only on shard2 50 times
+ shard2.add(sdoc("id", getDocNum(), "foo_s", "junkA"));
+ }
+ // put "bbb0" on shard2 exactly once to sanity check refinement
+ shard2.add(sdoc("id", getDocNum(), "foo_s", "bbb0"));
+
+ // long 'tail' foo_s term appears in 45 docs on every shard
+ // foo_s:tail is the only term with bar_s sub-pivot terms
+ for (int i = 0; i < 45; i++) {
+
+ // for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
+ // but the top 5 terms are ccc(0-4) -- 7 on each shard
+ // (4 docs each have junk terms)
+ String sub_term = (i < 35) ? "ccc"+(i % 5) : ((i < 41) ? "tailB" : "junkA");
+ shard0.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
+ shard1.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
+
+ // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
+ // and 5 docs that use "tailB"
+ sub_term = (i < 40) ? "junkB"+(i % 5) : "tailB";
+ shard2.add(sdoc("id", getDocNum(), "foo_s", "tail", "bar_s", sub_term));
+ }
+
+ // really long tail uncommon foo_s terms on shard2
+ for (int i = 0; i < 30; i++) {
+ shard2.add(sdoc("id", getDocNum(), "foo_s", "zzz"+i));
+ }
+
+ commit();
+
+ SolrParams req = params( "q", "*:*",
+ "distrib", "false",
+ "facet", "true",
+ "facet.limit", "10",
+ "facet.pivot", "foo_s,bar_s");
+
+ // sanity check that our expectations about each shard (non-distrib) are correct
+
+ PivotField pivot = null;
+ List<PivotField> pivots = null;
+ List<PivotField>[] shardPivots = new List[3];
+ shardPivots[0] = shard0.query( req ).getFacetPivot().get("foo_s,bar_s");
+ shardPivots[1] = shard1.query( req ).getFacetPivot().get("foo_s,bar_s");
+ shardPivots[2] = shard2.query( req ).getFacetPivot().get("foo_s,bar_s");
+
+ // top 5 same on all shards
+ for (int i = 0; i < 3; i++) {
+ assertEquals(10, shardPivots[i].size());
+ for (int j = 0; j < 5; j++) {
+ pivot = shardPivots[i].get(j);
+ assertEquals(pivot.toString(), "aaa"+j, pivot.getValue());
+ assertEquals(pivot.toString(), 100, pivot.getCount());
+ }
+ }
+ // top 6-10 same on shard0 & shard11
+ for (int i = 0; i < 2; i++) {
+ for (int j = 5; j < 10; j++) {
+ pivot = shardPivots[i].get(j);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("bbb"));
+ assertEquals(pivot.toString(), 50, pivot.getCount());
+ }
+ }
+ // 6-10 on shard2
+ assertEquals("junkA", shardPivots[2].get(5).getValue());
+ assertEquals(50, shardPivots[2].get(5).getCount());
+ assertEquals("tail", shardPivots[2].get(6).getValue());
+ assertEquals(45, shardPivots[2].get(6).getCount());
+ assertEquals("bbb0", shardPivots[2].get(7).getValue());
+ assertEquals(1, shardPivots[2].get(7).getCount());
+ for (int j = 8; j < 10; j++) {
+ pivot = shardPivots[2].get(j);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("zzz"));
+ assertEquals(pivot.toString(), 1, pivot.getCount());
+ }
+ // check sub-shardPivots on "tail" from shard2
+ pivots = shardPivots[2].get(6).getPivot();
+ assertEquals(6, pivots.size());
+ for (int j = 0; j < 5; j++) {
+ pivot = pivots.get(j);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("junkB"));
+ assertEquals(pivot.toString(), 8, pivot.getCount());
+ }
+ pivot = pivots.get(5);
+ assertEquals("tailB", pivot.getValue());
+ assertEquals(5, pivot.getCount());
+
+ // if we disable overrequesting, we don't find the long tail
+
+ pivots = queryServer( params( "q", "*:*",
+ "shards", getShardsString(),
+ FacetParams.FACET_OVERREQUEST_COUNT, "0",
+ FacetParams.FACET_OVERREQUEST_RATIO, "0",
+ "facet", "true",
+ "facet.limit", "6",
+ "facet.pivot", "foo_s,bar_s" )
+ ).getFacetPivot().get("foo_s,bar_s");
+ assertEquals(6, pivots.size());
+ for (int i = 0; i < 5; i++) {
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
+ assertEquals(pivot.toString(), 300, pivot.getCount());
+ }
+ // even w/o the long tail, we should have still asked shard2 to refine bbb0
+ assertTrue(pivots.get(5).toString(), pivots.get(5).getValue().equals("bbb0"));
+ assertEquals(pivots.get(5).toString(), 101, pivots.get(5).getCount());
+
+ // with default overrequesting, we should find the correct top 6 including
+ // long tail and top sub-pivots
+ // (even if we disable overrequesting on the sub-pivot)
+ for (ModifiableSolrParams q : new ModifiableSolrParams[] {
+ params(),
+ params("f.bar_s.facet.overrequest.ratio","0",
+ "f.bar_s.facet.overrequest.count","0") }) {
+
+ q.add( params( "q", "*:*",
+ "shards", getShardsString(),
+ "facet", "true",
+ "facet.limit", "6",
+ "facet.pivot", "foo_s,bar_s" ));
+ pivots = queryServer( q ).getFacetPivot().get("foo_s,bar_s");
+
+ assertEquals(6, pivots.size());
+ for (int i = 0; i < 5; i++) {
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
+ assertEquals(pivot.toString(), 300, pivot.getCount());
+ }
+ pivot = pivots.get(5);
+ assertEquals(pivot.toString(), "tail", pivot.getValue());
+ assertEquals(pivot.toString(), 135, pivot.getCount());
+ // check the sub pivots
+ pivots = pivot.getPivot();
+ assertEquals(6, pivots.size());
+ pivot = pivots.get(0);
+ assertEquals(pivot.toString(), "tailB", pivot.getValue());
+ assertEquals(pivot.toString(), 17, pivot.getCount());
+ for (int i = 1; i < 6; i++) { // ccc(0-4)
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
+ assertEquals(pivot.toString(), 14, pivot.getCount());
+ }
+ }
+
+ // if we lower the facet.limit on the sub-pivot, overrequesting should still ensure
+ // that we get the correct top5 including "tailB"
+
+ pivots = queryServer( params( "q", "*:*",
+ "shards", getShardsString(),
+ "facet", "true",
+ "facet.limit", "6",
+ "f.bar_s.facet.limit", "5",
+ "facet.pivot", "foo_s,bar_s" )
+ ).getFacetPivot().get("foo_s,bar_s");
+ assertEquals(6, pivots.size());
+ for (int i = 0; i < 5; i++) {
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
+ assertEquals(pivot.toString(), 300, pivot.getCount());
+ }
+ pivot = pivots.get(5);
+ assertEquals(pivot.toString(), "tail", pivot.getValue());
+ assertEquals(pivot.toString(), 135, pivot.getCount());
+ // check the sub pivots
+ pivots = pivot.getPivot();
+ assertEquals(5, pivots.size());
+ pivot = pivots.get(0);
+ assertEquals(pivot.toString(), "tailB", pivot.getValue());
+ assertEquals(pivot.toString(), 17, pivot.getCount());
+ for (int i = 1; i < 5; i++) { // ccc(0-3)
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
+ assertEquals(pivot.toString(), 14, pivot.getCount());
+ }
+
+ // however with a lower limit and overrequesting disabled,
+ // we're going to miss out on tailB
+
+ pivots = queryServer( params( "q", "*:*",
+ "shards", getShardsString(),
+ "facet", "true",
+ "facet.limit", "6",
+ "f.bar_s.facet.overrequest.ratio", "0",
+ "f.bar_s.facet.overrequest.count", "0",
+ "f.bar_s.facet.limit", "5",
+ "facet.pivot", "foo_s,bar_s" )
+ ).getFacetPivot().get("foo_s,bar_s");
+ assertEquals(6, pivots.size());
+ for (int i = 0; i < 5; i++) {
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("aaa"));
+ assertEquals(pivot.toString(), 300, pivot.getCount());
+ }
+ pivot = pivots.get(5);
+ assertEquals(pivot.toString(), "tail", pivot.getValue());
+ assertEquals(pivot.toString(), 135, pivot.getCount());
+ // check the sub pivots
+ pivots = pivot.getPivot();
+ assertEquals(5, pivots.size());
+ for (int i = 0; i < 5; i++) { // ccc(0-4)
+ pivot = pivots.get(i);
+ assertTrue(pivot.toString(), pivot.getValue().toString().startsWith("ccc"));
+ assertEquals(pivot.toString(), 14, pivot.getCount());
+ }
+ }
+
+}
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java?rev=1617789&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotSmallTest.java Wed Aug 13 18:23:53 2014
@@ -0,0 +1,439 @@
+package org.apache.solr.handler.component;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.solr.BaseDistributedSearchTestCase;
+import org.apache.solr.client.solrj.response.PivotField;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.params.FacetParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+
+import junit.framework.AssertionFailedError;
+
+public class DistributedFacetPivotSmallTest extends BaseDistributedSearchTestCase {
+
+ public DistributedFacetPivotSmallTest() {
+ this.fixShardCount = true;
+ this.shardCount = 4;
+ }
+
+ @Override
+ public void doTest() throws Exception {
+
+ del("*:*");
+
+ // NOTE: we use the literal (4 character) string "null" as a company name
+ // to help ensure there isn't any bugs where the literal string is treated as if it
+ // were a true NULL value.
+ index(id, 19, "place_t", "cardiff dublin", "company_t", "microsoft polecat");
+ index(id, 20, "place_t", "dublin", "company_t", "polecat microsoft null");
+ index(id, 21, "place_t", "london la dublin", "company_t",
+ "microsoft fujitsu null polecat");
+ index(id, 22, "place_t", "krakow london cardiff", "company_t",
+ "polecat null bbc");
+ index(id, 23, "place_t", "london", "company_t", "");
+ index(id, 24, "place_t", "la", "company_t", "");
+ index(id, 25, "company_t", "microsoft polecat null fujitsu null bbc");
+ index(id, 26, "place_t", "krakow", "company_t", "null");
+ index(id, 27, "place_t", "krakow cardiff dublin london la", "company_t",
+ "null microsoft polecat bbc fujitsu");
+ index(id, 28, "place_t", "cork", "company_t",
+ "fujitsu rte");
+ commit();
+
+ handle.clear();
+ handle.put("QTime", SKIPVAL);
+ handle.put("timestamp", SKIPVAL);
+ handle.put("maxScore", SKIPVAL);
+
+
+ final ModifiableSolrParams params = new ModifiableSolrParams();
+ setDistributedParams(params);
+ params.add("q", "*:*");
+ params.add("facet", "true");
+ params.add("facet.pivot", "place_t,company_t");
+
+
+ QueryResponse rsp = queryServer(params);
+
+ List<PivotField> expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
+ List<PivotField> expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "null", 2, null));
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
+ List<PivotField> expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat", 4, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft", 4, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "null", 3, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
+ List<PivotField> expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "microsoft", 2, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
+ List<PivotField> expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedLAPivots.add(new ComparablePivotField("company_t", "microsoft", 2,null));
+ expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2,null));
+ expectedLAPivots.add(new ComparablePivotField("company_t", "null", 2, null));
+ expectedLAPivots.add(new ComparablePivotField("company_t", "bbc", 1, null));
+ expectedLAPivots.add(new ComparablePivotField("company_t", "polecat", 2,null));
+ List<PivotField> expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "polecat",2, null));
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3,null));
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "microsoft", 1, null));
+ List<PivotField> expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
+ expectedCorkPivots.add(new ComparablePivotField("company_t", "rte", 1, null));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "cardiff", 3, expectedCardiffPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4, expectedLondonPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "la", 3, expectedLAPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "krakow", 3, expectedKrakowPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "cork", 1, expectedCorkPivots));
+
+
+ List<PivotField> placePivots = rsp.getFacetPivot().get("place_t,company_t");
+
+ // Useful to check for errors, orders lists and does toString() equality
+ // check
+ testOrderedPivotsStringEquality(expectedPlacePivots, placePivots);
+
+ assertEquals(expectedPlacePivots, placePivots);
+
+ // Test sorting by count
+
+ params.set(FacetParams.FACET_SORT, FacetParams.FACET_SORT_COUNT);
+
+ rsp = queryServer(params);
+
+ placePivots = rsp.getFacetPivot().get("place_t,company_t");
+
+ testCountSorting(placePivots);
+
+ // Test limit
+
+ params.set(FacetParams.FACET_LIMIT, 2);
+
+ rsp = queryServer(params);
+
+ expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",
+ 4, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",
+ 4, null));
+ expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3,
+ null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3,
+ null));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4,
+ expectedDublinPivots));
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "london", 4,
+ expectedLondonPivots));
+
+ placePivots = rsp.getFacetPivot().get("place_t,company_t");
+
+ assertEquals(expectedPlacePivots, placePivots);
+
+ // Test individual facet.limit values
+ params.remove(FacetParams.FACET_LIMIT);
+
+ params.set("f.place_t." + FacetParams.FACET_LIMIT, 1);
+ params.set("f.company_t." + FacetParams.FACET_LIMIT, 4);
+
+ rsp = queryServer(params);
+
+ expectedPlacePivots = new UnorderedEqualityArrayList<PivotField>();
+
+ expectedDublinPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "microsoft",4, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "polecat",4, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "null",3, null));
+ expectedDublinPivots.add(new ComparablePivotField("company_t", "fujitsu",2, null));
+
+ expectedLondonPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "null", 3, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "bbc", 2, null));
+ expectedLondonPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
+
+ expectedCardiffPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedCardiffPivots.add(new ComparablePivotField("company_t", "polecat", 3, null));
+
+ expectedKrakowPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedKrakowPivots.add(new ComparablePivotField("company_t", "null", 3, null));
+
+ expectedLAPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedLAPivots.add(new ComparablePivotField("company_t", "fujitsu", 2, null));
+
+ expectedCorkPivots = new UnorderedEqualityArrayList<PivotField>();
+ expectedCorkPivots.add(new ComparablePivotField("company_t", "fujitsu", 1, null));
+
+ expectedPlacePivots.add(new ComparablePivotField("place_t", "dublin", 4, expectedDublinPivots));
+
+ placePivots = rsp.getFacetPivot().get("place_t,company_t");
+ assertEquals(expectedPlacePivots, placePivots);
+
+ params.remove("f.company_t." + FacetParams.FACET_LIMIT);
+ params.remove("f.place_t." + FacetParams.FACET_LIMIT);
+ params.set(FacetParams.FACET_LIMIT, 2);
+
+ // Test facet.missing=true with diff sorts
+
+ index("id",777); // NOTE: id=25 has no place as well
+ commit();
+
+ SolrParams missingA = params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","place_t,company_t",
+ // default facet.sort
+ FacetParams.FACET_MISSING, "true" );
+ SolrParams missingB = SolrParams.wrapDefaults(missingA,
+ params(FacetParams.FACET_LIMIT, "4",
+ "facet.sort", "index"));
+ for (SolrParams p : new SolrParams[] { missingA, missingB }) {
+ // in either case, the last pivot option should be the same
+ rsp = query( p );
+ placePivots = rsp.getFacetPivot().get("place_t,company_t");
+ assertTrue("not enough values for pivot: " + p + " => " + placePivots,
+ 1 < placePivots.size());
+ PivotField missing = placePivots.get(placePivots.size()-1);
+ assertNull("not the missing place value: " + p, missing.getValue());
+ assertEquals("wrong missing place count: " + p, 2, missing.getCount());
+ assertTrue("not enough sub-pivots for missing place: "+ p +" => " + missing.getPivot(),
+ 1 < missing.getPivot().size());
+ missing = missing.getPivot().get(missing.getPivot().size()-1);
+ assertNull("not the missing company value: " + p, missing.getValue());
+ assertEquals("wrong missing company count: " + p, 1, missing.getCount());
+ assertNull("company shouldn't have sub-pivots: " + p, missing.getPivot());
+ }
+
+ // sort=index + mincount + limit
+ for (SolrParams variableParams : new SolrParams[] {
+ // we should get the same results regardless of overrequest
+ params("facet.overrequest.count","0",
+ "facet.overrequest.ratio","0"),
+ params() }) {
+
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","company_t",
+ "facet.sort", "index",
+ "facet.pivot.mincount", "4",
+ "facet.limit", "4"),
+ variableParams );
+
+ try {
+ List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
+ assertEquals(4, pivots.size());
+ assertEquals("fujitsu", pivots.get(0).getValue());
+ assertEquals(4, pivots.get(0).getCount());
+ assertEquals("microsoft", pivots.get(1).getValue());
+ assertEquals(5, pivots.get(1).getCount());
+ assertEquals("null", pivots.get(2).getValue());
+ assertEquals(6, pivots.get(2).getCount());
+ assertEquals("polecat", pivots.get(3).getValue());
+ assertEquals(6, pivots.get(3).getCount());
+
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+
+ // sort=index + mincount + limit + offset
+ for (SolrParams variableParams : new SolrParams[] {
+ // we should get the same results regardless of overrequest
+ params("facet.overrequest.count","0",
+ "facet.overrequest.ratio","0"),
+ params() }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.pivot","company_t",
+ "facet.sort", "index",
+ "facet.pivot.mincount", "4",
+ "facet.offset", "1",
+ "facet.limit", "4"),
+ variableParams );
+ try {
+ List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
+ assertEquals(3, pivots.size()); // asked for 4, but not enough meet the mincount
+ assertEquals("microsoft", pivots.get(0).getValue());
+ assertEquals(5, pivots.get(0).getCount());
+ assertEquals("null", pivots.get(1).getValue());
+ assertEquals(6, pivots.get(1).getCount());
+ assertEquals("polecat", pivots.get(2).getValue());
+ assertEquals(6, pivots.get(2).getCount());
+
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+
+ }
+
+ // sort=index + mincount + limit + offset (more permutations)
+ for (SolrParams variableParams : new SolrParams[] {
+ // all of these combinations should result in the same first value
+ params("facet.pivot.mincount", "4",
+ "facet.offset", "2"),
+ params("facet.pivot.mincount", "5",
+ "facet.offset", "1"),
+ params("facet.pivot.mincount", "6",
+ "facet.offset", "0" ) }) {
+
+ SolrParams p = SolrParams.wrapDefaults( params( "q", "*:*",
+ "rows", "0",
+ "facet","true",
+ "facet.limit","1",
+ "facet.sort","index",
+ "facet.overrequest.ratio","0",
+ "facet.pivot", "company_t"),
+ variableParams );
+
+ try {
+ List<PivotField> pivots = query( p ).getFacetPivot().get("company_t");
+ assertEquals(1, pivots.size());
+ assertEquals(pivots.toString(), "null", pivots.get(0).getValue());
+ assertEquals(pivots.toString(), 6, pivots.get(0).getCount());
+
+ } catch (AssertionFailedError ae) {
+ throw new AssertionError(ae.getMessage() + " <== " + p.toString(), ae);
+ }
+ }
+ }
+
+ // Useful to check for errors, orders lists and does toString() equality check
+ private void testOrderedPivotsStringEquality(
+ List<PivotField> expectedPlacePivots, List<PivotField> placePivots) {
+ Collections.sort(expectedPlacePivots, new PivotFieldComparator());
+ for (PivotField expectedPivot : expectedPlacePivots) {
+ if (expectedPivot.getPivot() != null) {
+ Collections.sort(expectedPivot.getPivot(), new PivotFieldComparator());
+ }
+ }
+ Collections.sort(placePivots, new PivotFieldComparator());
+ for (PivotField pivot : placePivots) {
+ if (pivot.getPivot() != null) {
+ Collections.sort(pivot.getPivot(), new PivotFieldComparator());
+ }
+ }
+ assertEquals(expectedPlacePivots.toString(), placePivots.toString());
+ }
+
+ private void testCountSorting(List<PivotField> pivots) {
+ Integer lastCount = null;
+ for (PivotField pivot : pivots) {
+ if (lastCount != null) {
+ assertTrue(pivot.getCount() <= lastCount);
+ }
+ lastCount = pivot.getCount();
+ if (pivot.getPivot() != null) {
+ testCountSorting(pivot.getPivot());
+ }
+ }
+ }
+
+ public static class ComparablePivotField extends PivotField {
+
+
+ public ComparablePivotField(String f, Object v, int count,
+ List<PivotField> pivot) {
+ super(f,v,count,pivot);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (!obj.getClass().isAssignableFrom(PivotField.class)) return false;
+ PivotField other = (PivotField) obj;
+ if (getCount() != other.getCount()) return false;
+ if (getField() == null) {
+ if (other.getField() != null) return false;
+ } else if (!getField().equals(other.getField())) return false;
+ if (getPivot() == null) {
+ if (other.getPivot() != null) return false;
+ } else if (!getPivot().equals(other.getPivot())) return false;
+ if (getValue() == null) {
+ if (other.getValue() != null) return false;
+ } else if (!getValue().equals(other.getValue())) return false;
+ return true;
+ }
+ }
+
+ public static class UnorderedEqualityArrayList<T> extends ArrayList<T> {
+
+ @Override
+ public boolean equals(Object o) {
+ boolean equal = false;
+ if (o instanceof ArrayList) {
+ List<?> otherList = (List<?>) o;
+ if (size() == otherList.size()) {
+ equal = true;
+ for (Object objectInOtherList : otherList) {
+ if (!contains(objectInOtherList)) {
+ equal = false;
+ }
+ }
+ }
+ }
+ return equal;
+ }
+
+ public int indexOf(Object o) {
+ for (int i = 0; i < size(); i++) {
+ if (get(i).equals(o)) {
+ return i;
+ }
+ }
+ return -1;
+ }
+ }
+
+ public class PivotFieldComparator implements Comparator<PivotField> {
+
+ @Override
+ public int compare(PivotField o1, PivotField o2) {
+ Integer compare = (Integer.valueOf(o2.getCount())).compareTo(Integer
+ .valueOf(o1.getCount()));
+ if (compare == 0) {
+ compare = ((String) o2.getValue()).compareTo((String) o1.getValue());
+ }
+ return compare;
+ }
+
+ }
+
+}
Added: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java?rev=1617789&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java (added)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TestPivotHelperCode.java Wed Aug 13 18:23:53 2014
@@ -0,0 +1,118 @@
+package org.apache.solr.handler.component;
+
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.handler.component.PivotFacetField;
+
+import org.apache.lucene.util.TestUtil;
+
+import java.util.List;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.Collections;
+
+/**
+ * A light weight test of various helper methods used in pivot faceting
+ *
+ **/
+public class TestPivotHelperCode extends SolrTestCaseJ4{
+
+ /**
+ * test refinement encoding/decoding matches specific expected encoded values
+ * @see PivotFacetHelper#encodeRefinementValuePath
+ * @see PivotFacetHelper#decodeRefinementValuePath
+ */
+ public void testRefinementStringEncodingWhiteBox() {
+ // trivial example with some basci escaping of an embedded comma
+ assertBiDirectionalEncoding(strs("foo,bar","yak","zat"), "~foo\\,bar,~yak,~zat");
+
+ // simple single valued case
+ assertBiDirectionalEncoding( strs("foo"), "~foo");
+
+ // special case: empty list
+ assertBiDirectionalEncoding(strs(), "");
+
+ // special case: single element list containing empty string
+ assertBiDirectionalEncoding(strs(""), "~");
+
+ // special case: single element list containing null
+ assertBiDirectionalEncoding(strs((String)null), "^");
+
+ // mix of empty strings & null with other values
+ assertBiDirectionalEncoding(strs("", "foo", "", "", null, "bar"),
+ "~,~foo,~,~,^,~bar");
+ }
+
+ /**
+ * test refinement encoding/decoding of random sets of values can be round tripped,
+ * w/o worrying about what the actual encoding looks like
+ *
+ * @see PivotFacetHelper#encodeRefinementValuePath
+ * @see PivotFacetHelper#decodeRefinementValuePath
+ */
+ public void testRefinementStringEncodingBlockBoxRoundTrip() {
+ // random data: we should be able to round trip any set of random strings
+ final int numIters = atLeast(100);
+ for (int i = 0; i < numIters; i++) {
+ final int numStrs = atLeast(1);
+ List<String> data = new ArrayList<String>(numStrs);
+ for (int j = 0; j < numStrs; j++) {
+ // :TODO: mix in nulls
+ data.add(TestUtil.randomUnicodeString(random()));
+ }
+ String encoded = PivotFacetHelper.encodeRefinementValuePath(data);
+ List<String> decoded = PivotFacetHelper.decodeRefinementValuePath(encoded);
+ assertEquals(data, decoded);
+ }
+
+ }
+
+ private void assertBiDirectionalEncoding(List<String> data, String encoded) {
+ assertEquals(data, PivotFacetHelper.decodeRefinementValuePath(encoded));
+ assertEquals(encoded, PivotFacetHelper.encodeRefinementValuePath(data));
+ }
+
+
+ public void testCompareWithNullLast() throws Exception {
+ Long a = random().nextLong();
+ Long b = random().nextLong();
+
+ assertEquals(a.compareTo(b), PivotFacetFieldValueCollection.compareWithNullLast(a, b));
+ assertEquals(b.compareTo(a), PivotFacetFieldValueCollection.compareWithNullLast(b, a));
+
+ Long bb = new Long(b.longValue());
+ assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(b, bb));
+
+ assertEquals(0, PivotFacetFieldValueCollection.compareWithNullLast(null, null));
+
+ assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(a, null) < 0 );
+ assertTrue( PivotFacetFieldValueCollection.compareWithNullLast(b, null) < 0 );
+
+ assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, a) );
+ assertTrue( 0 < PivotFacetFieldValueCollection.compareWithNullLast(null, b) );
+
+ }
+
+
+ private List<String> strs(String... strs) {
+ return Arrays.<String>asList(strs);
+ }
+
+}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/TestUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/TestUtils.java?rev=1617789&r1=1617788&r2=1617789&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/TestUtils.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/util/TestUtils.java Wed Aug 13 18:23:53 2014
@@ -17,6 +17,7 @@
package org.apache.solr.util;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -31,6 +32,29 @@ import org.junit.Assert;
*
*/
public class TestUtils extends LuceneTestCase {
+
+ public void testJoin() {
+ assertEquals("a|b|c", StrUtils.join(Arrays.asList("a","b","c"), '|'));
+ assertEquals("a,b,c", StrUtils.join(Arrays.asList("a","b","c"), ','));
+ assertEquals("a\\,b,c", StrUtils.join(Arrays.asList("a,b","c"), ','));
+ assertEquals("a,b|c", StrUtils.join(Arrays.asList("a,b","c"), '|'));
+
+ assertEquals("a\\\\b|c", StrUtils.join(Arrays.asList("a\\b","c"), '|'));
+ }
+
+ public void testEscapeTextWithSeparator() {
+ assertEquals("a", StrUtils.escapeTextWithSeparator("a", '|'));
+ assertEquals("a", StrUtils.escapeTextWithSeparator("a", ','));
+
+ assertEquals("a\\|b", StrUtils.escapeTextWithSeparator("a|b", '|'));
+ assertEquals("a|b", StrUtils.escapeTextWithSeparator("a|b", ','));
+ assertEquals("a,b", StrUtils.escapeTextWithSeparator("a,b", '|'));
+ assertEquals("a\\,b", StrUtils.escapeTextWithSeparator("a,b", ','));
+ assertEquals("a\\\\b", StrUtils.escapeTextWithSeparator("a\\b", ','));
+
+ assertEquals("a\\\\\\,b", StrUtils.escapeTextWithSeparator("a\\,b", ','));
+ }
+
public void testSplitEscaping() {
List<String> arr = StrUtils.splitSmart("\\r\\n:\\t\\f\\b", ":", true);
assertEquals(2,arr.size());
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java?rev=1617789&r1=1617788&r2=1617789&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java Wed Aug 13 18:23:53 2014
@@ -390,10 +390,19 @@ public class QueryResponse extends SolrR
ArrayList<PivotField> values = new ArrayList<>( list.size() );
for( NamedList nl : list ) {
// NOTE, this is cheating, but we know the order they are written in, so no need to check
+ assert "field".equals(nl.getName(0));
String f = (String)nl.getVal( 0 );
+ assert "value".equals(nl.getName(1));
Object v = nl.getVal( 1 );
+ assert "count".equals(nl.getName(2));
int cnt = ((Integer)nl.getVal( 2 )).intValue();
- List<PivotField> p = (nl.size()<4)?null:readPivots((List<NamedList>)nl.getVal(3) );
+ List<PivotField> p = null;
+ if (4 <= nl.size()) {
+ assert "pivot".equals(nl.getName(3));
+ Object subPiv = nl.getVal(3);
+ assert null != subPiv : "Server sent back 'null' for sub pivots?";
+ p = readPivots( (List<NamedList>) subPiv );
+ }
values.add( new PivotField( f, v, cnt, p ) );
}
return values;
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java?rev=1617789&r1=1617788&r2=1617789&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java Wed Aug 13 18:23:53 2014
@@ -99,6 +99,24 @@ public interface FacetParams {
*/
public static final String FACET_MISSING = FACET + ".missing";
+
+ static final String FACET_OVERREQUEST = FACET + ".overrequest";
+
+ /**
+ * The percentage to over-request by when performing initial distributed requests.
+ *
+ * default value is 1.5
+ */
+ public static final String FACET_OVERREQUEST_RATIO = FACET_OVERREQUEST + ".ratio";
+
+ /**
+ * An additional amount to over-request by when performing initial distributed requests. This
+ * value will be added after accounting for the over-request ratio.
+ *
+ * default value is 10
+ */
+ public static final String FACET_OVERREQUEST_COUNT = FACET_OVERREQUEST + ".count";
+
/**
* Comma separated list of fields to pivot
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java?rev=1617789&r1=1617788&r2=1617789&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java Wed Aug 13 18:23:53 2014
@@ -143,7 +143,10 @@ public class StrUtils {
return result;
}
- /** Creates a backslash escaped string, joining all the items. */
+ /**
+ * Creates a backslash escaped string, joining all the items.
+ * @see #escapeTextWithSeparator
+ */
public static String join(List<?> items, char separator) {
StringBuilder sb = new StringBuilder(items.size() << 3);
boolean first=true;
@@ -154,13 +157,7 @@ public class StrUtils {
} else {
sb.append(separator);
}
- for (int i=0; i<item.length(); i++) {
- char ch = item.charAt(i);
- if (ch=='\\' || ch == separator) {
- sb.append('\\');
- }
- sb.append(ch);
- }
+ appendEscapedTextToBuilder(sb, item, separator);
}
return sb.toString();
}
@@ -283,4 +280,31 @@ public class StrUtils {
}
}
+ /**
+ * Creates a new copy of the string with the separator backslash escaped.
+ * @see #join
+ */
+ public static String escapeTextWithSeparator(String item, char separator) {
+ StringBuilder sb = new StringBuilder(item.length() * 2);
+ appendEscapedTextToBuilder(sb, item, separator);
+ return sb.toString();
+ }
+
+ /**
+ * writes chars from item to out, backslash escaping as needed based on separator --
+ * but does not append the seperator itself
+ */
+ public static void appendEscapedTextToBuilder(StringBuilder out,
+ String item,
+ char separator) {
+ for (int i = 0; i < item.length(); i++) {
+ char ch = item.charAt(i);
+ if (ch == '\\' || ch == separator) {
+ out.append('\\');
+ }
+ out.append(ch);
+ }
+ }
+
+
}
Modified: lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java?rev=1617789&r1=1617788&r2=1617789&view=diff
==============================================================================
--- lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java (original)
+++ lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java Wed Aug 13 18:23:53 2014
@@ -58,6 +58,7 @@ import org.apache.solr.request.SolrQuery
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TrieDateField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.servlet.DirectSolrConnection;
import org.apache.solr.util.AbstractSolrTestCase;
@@ -93,11 +94,13 @@ import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.logging.ConsoleHandler;
@@ -2050,5 +2053,44 @@ public abstract class SolrTestCaseJ4 ext
return true;
}
+ /**
+ * Returns <code>likely</code> most (1/10) of the time, otherwise <code>unlikely</code>
+ */
+ public static Object skewed(Object likely, Object unlikely) {
+ return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely;
+ }
+
+ /**
+ * Returns a randomly generated Date in the appropriate Solr external (input) format
+ * @see #randomSkewedDate
+ */
+ public static String randomDate() {
+ return TrieDateField.formatExternal(new Date(random().nextLong()));
+ }
+
+ /**
+ * Returns a Date such that all results from this method always have the same values for
+ * year+month+day+hour+minute but the seconds are randomized. This can be helpful for
+ * indexing documents with random date values that are biased for a narrow window
+ * (one day) to test collisions/overlaps
+ *
+ * @see #randomDate
+ */
+ public static String randomSkewedDate() {
+ return String.format(Locale.ROOT, "2010-10-31T10:31:%02d.000Z",
+ TestUtil.nextInt(random(), 0, 59));
+ }
+
+ /**
+ * We want "realistic" unicode strings beyond simple ascii, but because our
+ * updates use XML we need to ensure we don't get "special" code block.
+ */
+ public static String randomXmlUsableUnicodeString() {
+ String result = TestUtil.randomRealisticUnicodeString(random());
+ if (result.matches(".*\\p{InSpecials}.*")) {
+ result = TestUtil.randomSimpleString(random());
+ }
+ return result;
+ }
}