You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/10/29 19:07:35 UTC
[11/32] lucene-solr:jira/solr-12730: SOLR-12793: Move
TestCloudJSONFacetJoinDomain amd TestCloudJSONFacetSKG to the facet test
package
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
new file mode 100644
index 0000000..91b912f
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetJoinDomain.java
@@ -0,0 +1,855 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.AbstractDistribZkTestBase;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.cloud.TestCloudPivotFacet;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * Tests randomized JSON Facets, sometimes using query 'join' domain transfers and/or domain 'filter' options
+ * </p>
+ * <p>
+ * The results of each facet constraint count will be compared with a verification query using an equivalent filter
+ * </p>
+ *
+ * @see TestCloudPivotFacet
+ */
+public class TestCloudJSONFacetJoinDomain extends SolrCloudTestCase {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName();
+ private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection";
+
+ private static final int DEFAULT_LIMIT = FacetField.DEFAULT_FACET_LIMIT;
+ private static final int MAX_FIELD_NUM = 15;
+ private static final int UNIQUE_FIELD_VALS = 20;
+
+ // NOTE: set to 'true' to see if refinement testing is adequate (should get fails occasionally)
+ private static final boolean FORCE_DISABLE_REFINEMENT = false;
+
+ /** Multivalued string field suffixes that can be randomized for testing diff facet/join code paths */
+ private static final String[] STR_FIELD_SUFFIXES = new String[] { "_ss", "_sds", "_sdsS" };
+ /** Multivalued int field suffixes that can be randomized for testing diff facet/join code paths */
+ private static final String[] INT_FIELD_SUFFIXES = new String[] { "_is", "_ids", "_idsS" };
+
+ /** A basic client for operations at the cloud level, default collection will be set */
+ private static CloudSolrClient CLOUD_CLIENT;
+ /** One client per node */
+ private static ArrayList<HttpSolrClient> CLIENTS = new ArrayList<>(5);
+
+ @BeforeClass
+ private static void createMiniSolrCloudCluster() throws Exception {
+ // sanity check constants
+ assertTrue("bad test constants: some suffixes will never be tested",
+ (STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) && (INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM));
+
+ // we need DVs on point fields to compute stats & facets
+ if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
+
+ // multi replicas should not matter...
+ final int repFactor = usually() ? 1 : 2;
+ // ... but we definitely want to test multiple shards
+ final int numShards = TestUtil.nextInt(random(), 1, (usually() ? 2 :3));
+ final int numNodes = (numShards * repFactor);
+
+ final String configName = DEBUG_LABEL + "_config-set";
+ final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
+
+ configureCluster(numNodes).addConfig(configName, configDir).configure();
+
+ Map<String, String> collectionProperties = new LinkedHashMap<>();
+ collectionProperties.put("config", "solrconfig-tlog.xml");
+ collectionProperties.put("schema", "schema_latest.xml");
+ CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
+ .setProperties(collectionProperties)
+ .process(cluster.getSolrClient());
+
+ CLOUD_CLIENT = cluster.getSolrClient();
+ CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
+
+ waitForRecoveriesToFinish(CLOUD_CLIENT);
+
+ for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
+ CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));
+ }
+
+ final int numDocs = atLeast(100);
+ for (int id = 0; id < numDocs; id++) {
+ SolrInputDocument doc = sdoc("id", ""+id);
+ for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
+ // NOTE: some docs may have no value in a field
+ final int numValsThisDoc = TestUtil.nextInt(random(), 0, (usually() ? 3 : 6));
+ for (int v = 0; v < numValsThisDoc; v++) {
+ final String fieldValue = randFieldValue(fieldNum);
+
+ // for each fieldNum, there are actaully two fields: one string, and one integer
+ doc.addField(field(STR_FIELD_SUFFIXES, fieldNum), fieldValue);
+ doc.addField(field(INT_FIELD_SUFFIXES, fieldNum), fieldValue);
+ }
+ }
+ CLOUD_CLIENT.add(doc);
+ if (random().nextInt(100) < 1) {
+ CLOUD_CLIENT.commit(); // commit 1% of the time to create new segments
+ }
+ if (random().nextInt(100) < 5) {
+ CLOUD_CLIENT.add(doc); // duplicate the doc 5% of the time to create deleted docs
+ }
+ }
+ CLOUD_CLIENT.commit();
+ }
+
+ /**
+ * Given a (random) number, and a (static) array of possible suffixes returns a consistent field name that
+ * uses that number and one of hte specified suffixes in it's name.
+ *
+ * @see #STR_FIELD_SUFFIXES
+ * @see #INT_FIELD_SUFFIXES
+ * @see #MAX_FIELD_NUM
+ * @see #randFieldValue
+ */
+ private static String field(final String[] suffixes, final int fieldNum) {
+ assert fieldNum < MAX_FIELD_NUM;
+
+ final String suffix = suffixes[fieldNum % suffixes.length];
+ return "field_" + fieldNum + suffix;
+ }
+ private static String strfield(final int fieldNum) {
+ return field(STR_FIELD_SUFFIXES, fieldNum);
+ }
+ private static String intfield(final int fieldNum) {
+ return field(INT_FIELD_SUFFIXES, fieldNum);
+ }
+
+ /**
+ * Given a (random) field number, returns a random (integer based) value for that field.
+ * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
+ * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins
+ * will match fewer documents based on how far apart the field numbers are.
+ *
+ * @see #UNIQUE_FIELD_VALS
+ * @see #field
+ */
+ private static String randFieldValue(final int fieldNum) {
+ return "" + (fieldNum + TestUtil.nextInt(random(), 1, UNIQUE_FIELD_VALS));
+ }
+
+
+ @AfterClass
+ private static void afterClass() throws Exception {
+ CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
+ for (HttpSolrClient client : CLIENTS) {
+ client.close();
+ }
+ CLIENTS = null;
+ }
+
+ /** Sanity check that malformed requests produce errors */
+ public void testMalformedGivesError() throws Exception {
+
+ ignoreException(".*'join' domain change.*");
+
+ for (String join : Arrays.asList("bogus",
+ "{ }",
+ "{ from:null, to:foo_s }",
+ "{ from:foo_s }",
+ "{ from:foo_s, to:foo_s, bogus:'what what?' }",
+ "{ to:foo_s, bogus:'what what?' }")) {
+ SolrException e = expectThrows(SolrException.class, () -> {
+ final SolrParams req = params("q", "*:*", "json.facet",
+ "{ x : { type:terms, field:x_s, domain: { join:"+join+" } } }");
+ final NamedList trash = getRandClient(random()).request(new QueryRequest(req));
+ });
+ assertEquals(join + " -> " + e, SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+ assertTrue(join + " -> " + e, e.getMessage().contains("'join' domain change"));
+ }
+ }
+
+ public void testSanityCheckDomainMethods() throws Exception {
+ {
+ final JoinDomain empty = new JoinDomain(null, null, null);
+ assertEquals(null, empty.toJSONFacetParamValue());
+ final SolrParams out = empty.applyDomainToQuery("safe_key", params("q","qqq"));
+ assertNotNull(out);
+ assertEquals(null, out.get("safe_key"));
+ assertEquals("qqq", out.get("q"));
+ }
+ {
+ final JoinDomain join = new JoinDomain("xxx", "yyy", null);
+ assertEquals("domain:{join:{from:xxx,to:yyy}}", join.toJSONFacetParamValue().toString());
+ final SolrParams out = join.applyDomainToQuery("safe_key", params("q","qqq"));
+ assertNotNull(out);
+ assertEquals("qqq", out.get("safe_key"));
+ assertEquals("{!join from=xxx to=yyy v=$safe_key}", out.get("q"));
+
+ }
+ {
+ final JoinDomain filter = new JoinDomain(null, null, "zzz");
+ assertEquals("domain:{filter:'zzz'}", filter.toJSONFacetParamValue().toString());
+ final SolrParams out = filter.applyDomainToQuery("safe_key", params("q","qqq"));
+ assertNotNull(out);
+ assertEquals(null, out.get("safe_key"));
+ assertEquals("zzz AND qqq", out.get("q"));
+ }
+ {
+ final JoinDomain both = new JoinDomain("xxx", "yyy", "zzz");
+ assertEquals("domain:{join:{from:xxx,to:yyy},filter:'zzz'}", both.toJSONFacetParamValue().toString());
+ final SolrParams out = both.applyDomainToQuery("safe_key", params("q","qqq"));
+ assertNotNull(out);
+ assertEquals("qqq", out.get("safe_key"));
+ assertEquals("zzz AND {!join from=xxx to=yyy v=$safe_key}", out.get("q"));
+ }
+ }
+
+ /**
+ * Test some small, hand crafted, but non-trivial queries that are
+ * easier to trace/debug then a pure random monstrosity.
+ * (ie: if something obvious gets broken, this test may fail faster and in a more obvious way then testRandom)
+ */
+ public void testBespoke() throws Exception {
+
+ { // sanity check our test methods can handle a query matching no docs
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"));
+ top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
+ facets.put("empty_top", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, strfield(7) + ":bogus");
+ assertEquals("Empty search result shouldn't have found a single bucket",
+ UNIQUE_FIELD_VALS, maxBuckets.get());
+ }
+
+ { // sanity check our test methods can handle a query where a facet filter prevents any doc from having terms
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), new JoinDomain(null, null, "-*:*"));
+ top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
+ facets.put("filtered_top", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "*:*");
+ assertEquals("Empty join filter shouldn't have found a single bucket",
+ UNIQUE_FIELD_VALS, maxBuckets.get());
+ }
+
+ { // sanity check our test methods can handle a query where a facet filter prevents any doc from having sub-terms
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(8), strfield(8), null));
+ top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(null, null, "-*:*")));
+ facets.put("filtered_top", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "*:*");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
+ }
+
+ { // strings
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"));
+ top.subFacets.put("facet_5", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
+ facets.put("facet_4", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":16 OR "+strfield(9)+":16 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+ }
+
+ { // ints
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(intfield(9), new JoinDomain(intfield(5), intfield(9), null));
+ facets.put("top", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+intfield(7)+":16 OR "+intfield(3)+":13)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+ }
+
+ { // some domains with filter only, no actual join
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), new JoinDomain(null, null, strfield(9)+":[* TO *]"));
+ top.subFacets.put("facet_5", new TermFacet(strfield(11), new JoinDomain(null, null, strfield(3)+":[* TO 5]")));
+ facets.put("top", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":16 OR "+strfield(9)+":16 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
+
+ }
+
+ { // low limits, explicit refinement
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9),
+ new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
+ 5, 0, true);
+ top.subFacets.put("facet_5", new TermFacet(strfield(11),
+ new JoinDomain(strfield(8), strfield(8), null),
+ 10, 0, true));
+ facets.put("facet_4", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
+ }
+
+ { // low limit, high overrequest
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9),
+ new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
+ 5, UNIQUE_FIELD_VALS + 10, false);
+ top.subFacets.put("facet_5", new TermFacet(strfield(11),
+ new JoinDomain(strfield(8), strfield(8), null),
+ 10, UNIQUE_FIELD_VALS + 10, false));
+ facets.put("facet_4", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
+ }
+
+ { // low limit, low overrequest, explicit refinement
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9),
+ new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
+ 5, 7, true);
+ top.subFacets.put("facet_5", new TermFacet(strfield(11),
+ new JoinDomain(strfield(8), strfield(8), null),
+ 10, 7, true));
+ facets.put("facet_4", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
+ assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
+ }
+
+ }
+
+ public void testTheTestRandomRefineParam() {
+ // sanity check that randomRefineParam never violates isRefinementNeeded
+ // (should be imposisble ... unless someone changes/breaks the randomization logic in the future)
+ final int numIters = atLeast(100);
+ for (int iter = 0; iter < numIters; iter++) {
+ final Integer limit = TermFacet.randomLimitParam(random());
+ final Integer overrequest = TermFacet.randomOverrequestParam(random());
+ final Boolean refine = TermFacet.randomRefineParam(random(), limit, overrequest);
+ if (TermFacet.isRefinementNeeded(limit, overrequest)) {
+ assertEquals("limit: " + limit + ", overrequest: " + overrequest + ", refine: " + refine,
+ Boolean.TRUE, refine);
+ }
+ }
+ }
+
+ public void testTheTestTermFacetShouldFreakOutOnBadRefineOptions() {
+ expectThrows(AssertionError.class, () -> {
+ final TermFacet bogus = new TermFacet("foo", null, 5, 0, false);
+ });
+ }
+
+ public void testRandom() throws Exception {
+
+ // we put a safety valve in place on the maximum number of buckets that we are willing to verify
+ // across *all* the queries that we do.
+ // that way if the randomized queries we build all have relatively small facets, so be it, but if
+ // we get a really big one early on, we can test as much as possible, skip other iterations.
+ //
+ // (deeply nested facets may contain more buckets then the max, but we won't *check* all of them)
+ final int maxBucketsAllowed = atLeast(2000);
+ final AtomicInteger maxBucketsToCheck = new AtomicInteger(maxBucketsAllowed);
+
+ final int numIters = atLeast(20);
+ for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
+ assertFacetCountsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(), buildRandomQuery());
+ }
+ assertTrue("Didn't check a single bucket???", maxBucketsToCheck.get() < maxBucketsAllowed);
+ }
+
+ /**
+ * Generates a random query string across the randomized fields/values in the index
+ *
+ * @see #randFieldValue
+ * @see #field
+ */
+ private static String buildRandomQuery() {
+ if (0 == TestUtil.nextInt(random(), 0,10)) {
+ return "*:*";
+ }
+ final int numClauses = TestUtil.nextInt(random(), 3, 10);
+ List<String> clauses = new ArrayList<String>(numClauses);
+ for (int c = 0; c < numClauses; c++) {
+ final int fieldNum = random().nextInt(MAX_FIELD_NUM);
+ // keep queries simple, just use str fields - not point of test
+ clauses.add(strfield(fieldNum) + ":" + randFieldValue(fieldNum));
+ }
+ return "(" + StringUtils.join(clauses, " OR ") + ")";
+ }
+
+ /**
+ * Given a set of (potentially nested) term facets, and a base query string, asserts that
+ * the actual counts returned when executing that query with those facets match the expected results
+ * of filtering on the equivalent facet terms+domain
+ */
+ private void assertFacetCountsAreCorrect(final AtomicInteger maxBucketsToCheck,
+ Map<String,TermFacet> expected,
+ final String query) throws SolrServerException, IOException {
+
+ final SolrParams baseParams = params("q", query, "rows","0");
+ final SolrParams facetParams = params("json.facet", ""+TermFacet.toJSONFacetParamValue(expected));
+ final SolrParams initParams = SolrParams.wrapAppended(facetParams, baseParams);
+
+ log.info("Doing full run: {}", initParams);
+
+ QueryResponse rsp = null;
+ // JSON Facets not (currently) available from QueryResponse...
+ NamedList topNamedList = null;
+ try {
+ rsp = (new QueryRequest(initParams)).process(getRandClient(random()));
+ assertNotNull(initParams + " is null rsp?", rsp);
+ topNamedList = rsp.getResponse();
+ assertNotNull(initParams + " is null topNamedList?", topNamedList);
+ } catch (Exception e) {
+ throw new RuntimeException("init query failed: " + initParams + ": " +
+ e.getMessage(), e);
+ }
+ try {
+ final NamedList facetResponse = (NamedList) topNamedList.get("facets");
+ assertNotNull("null facet results?", facetResponse);
+ assertEquals("numFound mismatch with top count?",
+ rsp.getResults().getNumFound(), ((Number)facetResponse.get("count")).longValue());
+ if (0 == rsp.getResults().getNumFound()) {
+ // when the query matches nothing, we should expect no top level facets
+ expected = Collections.emptyMap();
+ }
+ assertFacetCountsAreCorrect(maxBucketsToCheck, expected, baseParams, facetResponse);
+ } catch (AssertionError e) {
+ throw new AssertionError(initParams + " ===> " + topNamedList + " --> " + e.getMessage(), e);
+ } finally {
+ log.info("Ending full run");
+ }
+ }
+
+ /**
+ * Recursive Helper method that walks the actual facet response, comparing the counts to the expected output
+ * based on the equivalent filters generated from the original TermFacet.
+ */
+ private void assertFacetCountsAreCorrect(final AtomicInteger maxBucketsToCheck,
+ final Map<String,TermFacet> expected,
+ final SolrParams baseParams,
+ final NamedList actualFacetResponse) throws SolrServerException, IOException {
+
+ for (Map.Entry<String,TermFacet> entry : expected.entrySet()) {
+ final String facetKey = entry.getKey();
+ final TermFacet facet = entry.getValue();
+ final NamedList results = (NamedList) actualFacetResponse.get(facetKey);
+ assertNotNull(facetKey + " key missing from: " + actualFacetResponse, results);
+ final List<NamedList> buckets = (List<NamedList>) results.get("buckets");
+ assertNotNull(facetKey + " has null buckets: " + actualFacetResponse, buckets);
+
+ if (buckets.isEmpty()) {
+ // should only happen if the baseParams query does not match any docs with our field X
+ final long docsWithField = getRandClient(random()).query
+ (facet.applyValueConstraintAndDomain(baseParams, facetKey, "[* TO *]")).getResults().getNumFound();
+ assertEquals(facetKey + " has no buckets, but docs in query exist with field: " + facet.field,
+ 0, docsWithField);
+ }
+
+ for (NamedList bucket : buckets) {
+ final long count = ((Number) bucket.get("count")).longValue();
+ final String fieldVal = bucket.get("val").toString(); // int or stringified int
+
+ // change our query to filter on the fieldVal, and wrap in the facet domain (if any)
+ final SolrParams verifyParams = facet.applyValueConstraintAndDomain(baseParams, facetKey, fieldVal);
+
+ // check the count for this bucket
+ assertEquals(facetKey + ": " + verifyParams,
+ count, getRandClient(random()).query(verifyParams).getResults().getNumFound());
+
+ if (maxBucketsToCheck.decrementAndGet() <= 0) {
+ return;
+ }
+
+ // recursively check subFacets
+ if (! facet.subFacets.isEmpty()) {
+ assertFacetCountsAreCorrect(maxBucketsToCheck, facet.subFacets, verifyParams, bucket);
+ }
+ }
+ }
+ assertTrue("facets have unexpected keys left over: " + actualFacetResponse,
+ // should alwasy be a count, maybe a 'val' if we're a subfacet
+ (actualFacetResponse.size() == expected.size() + 1) ||
+ (actualFacetResponse.size() == expected.size() + 2));
+ }
+
+
+ /**
+ * Trivial data structure for modeling a simple terms facet that can be written out as a json.facet param.
+ *
+ * Doesn't do any string escaping or quoting, so don't use whitespace or reserved json characters
+ */
+ private static final class TermFacet {
+ public final String field;
+ public final Map<String,TermFacet> subFacets = new LinkedHashMap<>();
+ public final JoinDomain domain; // may be null
+ public final Integer limit; // may be null
+ public final Integer overrequest; // may be null
+ public final Boolean refine; // may be null
+
+ /** Simplified constructor asks for limit = # unique vals */
+ public TermFacet(String field, JoinDomain domain) {
+ this(field, domain, UNIQUE_FIELD_VALS, 0, false);
+ }
+ public TermFacet(String field, JoinDomain domain, Integer limit, Integer overrequest, Boolean refine) {
+ assert null != field;
+ this.field = field;
+ this.domain = domain;
+ this.limit = limit;
+ this.overrequest = overrequest;
+ this.refine = refine;
+ if (isRefinementNeeded(limit, overrequest)) {
+ assertEquals("Invalid refine param based on limit & overrequest: " + this.toString(),
+ Boolean.TRUE, refine);
+ }
+ }
+
+ /**
+ * Returns new SolrParams that:
+ * <ul>
+ * <li>copy the original SolrParams</li>
+ * <li>modify/wrap the original "q" param to capture the domain change for this facet (if any)</li>
+ * <li>add a filter query against this field with the specified value</li>
+ * </ul>
+ *
+ * @see JoinDomain#applyDomainToQuery
+ */
+ public SolrParams applyValueConstraintAndDomain(SolrParams orig, String facetKey, String facetVal) {
+ // first wrap our original query in the domain if there is one...
+ if (null != domain) {
+ orig = domain.applyDomainToQuery(facetKey + "_q", orig);
+ }
+ // then filter by the facet value we need to test...
+ final ModifiableSolrParams out = new ModifiableSolrParams(orig);
+ out.set("q", field + ":" + facetVal + " AND " + orig.get("q"));
+
+ return out;
+ }
+
+ /**
+ * recursively generates the <code>json.facet</code> param value to use for testing this facet
+ */
+ private CharSequence toJSONFacetParamValue() {
+ final String limitStr = (null == limit) ? "" : (", limit:" + limit);
+ final String overrequestStr = (null == overrequest) ? "" : (", overrequest:" + overrequest);
+ final String refineStr = (null == refine) ? "" : ", refine:" + refine;
+ final StringBuilder sb = new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + refineStr);
+ if (! subFacets.isEmpty()) {
+ sb.append(", facet:");
+ sb.append(toJSONFacetParamValue(subFacets));
+ }
+ if (null != domain) {
+ CharSequence ds = domain.toJSONFacetParamValue();
+ if (null != ds) {
+ sb.append(", ").append(ds);
+ }
+ }
+ sb.append("}");
+ return sb;
+ }
+
+ /**
+ * Given a set of (possibly nested) facets, generates a suitable <code>json.facet</code> param value to
+ * use for testing them against in a solr request.
+ */
+ public static CharSequence toJSONFacetParamValue(Map<String,TermFacet> facets) {
+ assert null != facets;
+ assert 0 < facets.size();
+ StringBuilder sb = new StringBuilder("{");
+ for (String key : facets.keySet()) {
+ sb.append(key).append(" : ").append(facets.get(key).toJSONFacetParamValue());
+ sb.append(" ,");
+ }
+ sb.setLength(sb.length() - 1);
+ sb.append("}");
+ return sb;
+ }
+
+ /**
+ * Factory method for generating some random (nested) facets.
+ *
+ * For simplicity, each facet will have a unique key name, regardless of it's depth under other facets
+ *
+ * @see JoinDomain
+ */
+ public static Map<String,TermFacet> buildRandomFacets() {
+ // for simplicity, use a unique facet key regardless of depth - simplifies verification
+ AtomicInteger keyCounter = new AtomicInteger(0);
+ final int maxDepth = TestUtil.nextInt(random(), 0, (usually() ? 2 : 3));
+ return buildRandomFacets(keyCounter, maxDepth);
+ }
+
+ /**
+ * picks a random value for the "limit" param, biased in favor of interesting test cases
+ *
+ * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
+ * @see #UNIQUE_FIELD_VALS
+ */
+ public static Integer randomLimitParam(Random r) {
+ final int limit = 1 + r.nextInt(UNIQUE_FIELD_VALS * 2);
+ if (limit >= UNIQUE_FIELD_VALS && r.nextBoolean()) {
+ return -1; // unlimited
+ } else if (limit == DEFAULT_LIMIT && r.nextBoolean()) {
+ return null; // sometimes, don't specify limit if it's the default
+ }
+ return limit;
+ }
+
+ /**
+ * picks a random value for the "overrequest" param, biased in favor of interesting test cases
+ *
+ * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
+ * @see #UNIQUE_FIELD_VALS
+ */
+ public static Integer randomOverrequestParam(Random r) {
+ switch(r.nextInt(10)) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ return 0; // 40% of the time, no overrequest to better stress refinement
+ case 4:
+ case 5:
+ return r.nextInt(UNIQUE_FIELD_VALS); // 20% ask for less them what's needed
+ case 6:
+ return r.nextInt(Integer.MAX_VALUE); // 10%: completley random value, statisticaly more then enough
+ default: break;
+ }
+ // else.... either leave param unspecified (or redundently specify the -1 default)
+ return r.nextBoolean() ? null : -1;
+ }
+
+ /**
+ * picks a random value for the "refine" param, that is garunteed to be suitable for
+ * the specified limit & overrequest params.
+ *
+ * @return a value to specify in the request, or null to specify nothing (trigger default behavior)
+ * @see #randomLimitParam
+ * @see #randomOverrequestParam
+ * @see #UNIQUE_FIELD_VALS
+ */
+ public static Boolean randomRefineParam(Random r, Integer limitParam, Integer overrequestParam) {
+ if (isRefinementNeeded(limitParam, overrequestParam)) {
+ return true;
+ }
+
+ // refinement is not required
+ if (0 == r.nextInt(10)) { // once in a while, turn on refinement even if it isn't needed.
+ return true;
+ }
+ // explicitly or implicitly indicate refinement is not needed
+ return r.nextBoolean() ? false : null;
+ }
+
+ /**
+ * Deterministicly identifies if the specified limit & overrequest params <b>require</b>
+ * a "refine:true" param be used in the the request, in order for the counts to be 100% accurate.
+ *
+ * @see #UNIQUE_FIELD_VALS
+ */
+ public static boolean isRefinementNeeded(Integer limitParam, Integer overrequestParam) {
+
+ if (FORCE_DISABLE_REFINEMENT) {
+ return false;
+ }
+
+ // use the "effective" values if the params are null
+ final int limit = null == limitParam ? DEFAULT_LIMIT : limitParam;
+ final int overrequest = null == overrequestParam ? 0 : overrequestParam;
+
+ return
+ // don't presume how much overrequest will be done by default, just check the limit
+ (overrequest < 0 && limit < UNIQUE_FIELD_VALS)
+ // if the user specified overrequest is not "enough" to get all unique values
+ || (overrequest >= 0 && (long)limit + overrequest < UNIQUE_FIELD_VALS);
+ }
+
+ /**
+ * recursive helper method for building random facets
+ *
+ * @param keyCounter used to ensure every generated facet has a unique key name
+ * @param maxDepth max possible depth allowed for the recusion, a lower value may be used depending on how many facets are returned at the current level.
+ */
+ private static Map<String,TermFacet> buildRandomFacets(AtomicInteger keyCounter, int maxDepth) {
+ final int numFacets = Math.max(1, TestUtil.nextInt(random(), -1, 3)); // 3/5th chance of being '1'
+ Map<String,TermFacet> results = new LinkedHashMap<>();
+ for (int i = 0; i < numFacets; i++) {
+ final JoinDomain domain = JoinDomain.buildRandomDomain();
+ assert null != domain;
+ final Integer limit = randomLimitParam(random());
+ final Integer overrequest = randomOverrequestParam(random());
+ final TermFacet facet = new TermFacet(field(random().nextBoolean() ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES,
+ random().nextInt(MAX_FIELD_NUM)),
+ domain, limit, overrequest,
+ randomRefineParam(random(), limit, overrequest));
+ results.put("facet_" + keyCounter.incrementAndGet(), facet);
+ if (0 < maxDepth) {
+ // if we're going wide, don't go deep
+ final int nextMaxDepth = Math.max(0, maxDepth - numFacets);
+ facet.subFacets.putAll(buildRandomFacets(keyCounter, TestUtil.nextInt(random(), 0, nextMaxDepth)));
+ }
+ }
+ return results;
+ }
+ }
+
+
+ /**
+ * Models a Domain Change which includes either a 'join' or a 'filter' or both
+ */
+ private static final class JoinDomain {
+ public final String from;
+ public final String to;
+ public final String filter; // not bothering with more then 1 filter, not the point of the test
+
+ /**
+ * @param from left side of join field name, null if domain involves no joining
+ * @param to right side of join field name, null if domain involves no joining
+ * @param filter filter to apply to domain, null if domain involves no filtering
+ */
+ public JoinDomain(String from, String to, String filter) {
+ assert ! ((null == from) ^ (null == to)) : "if from is null, to must be null";
+ this.from = from;
+ this.to = to;
+ this.filter = filter;
+ }
+
+ /**
+ * @return the JSON string representing this domain for use in a facet param, or null if no domain should be used
+ * */
+ public CharSequence toJSONFacetParamValue() {
+ if (null == from && null == filter) {
+ return null;
+ }
+ StringBuilder sb = new StringBuilder("domain:{");
+ if (null != from) {
+ assert null != to;
+ sb. append("join:{from:").append(from).append(",to:").append(to).append("}");
+ if (null != filter){
+ sb.append(",");
+ }
+
+ }
+ if (null != filter) {
+ sb.append("filter:'").append(filter).append("'");
+ }
+ sb.append("}");
+ return sb;
+ }
+
+ /**
+ * Given some original SolrParams, returns new SolrParams where the original "q" param is wrapped
+ * as needed to apply the equivalent transformation to a query as this domain would to a facet
+ */
+ public SolrParams applyDomainToQuery(String safeKey, SolrParams in) {
+ assert null == in.get(safeKey); // shouldn't be possible if every facet uses a unique key string
+
+ String q = in.get("q");
+ final ModifiableSolrParams out = new ModifiableSolrParams(in);
+ if (null != from) {
+ out.set(safeKey, in.get("q"));
+ q = "{!join from="+from+" to="+to+" v=$"+safeKey+"}";
+ }
+ if (null != filter) {
+ q = filter + " AND " + q;
+ }
+ out.set("q", q);
+ return out;
+ }
+
+ /**
+ * Factory method for creating a random domain change to use with a facet - may return an 'noop' JoinDomain,
+ * but will never return null.
+ */
+ public static JoinDomain buildRandomDomain() {
+
+ // use consistent type on both sides of join
+ final String[] suffixes = random().nextBoolean() ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES;
+
+ final boolean noJoin = random().nextBoolean();
+
+ String from = null;
+ String to = null;
+ for (;;) {
+ if (noJoin) break;
+ from = field(suffixes, random().nextInt(MAX_FIELD_NUM));
+ to = field(suffixes, random().nextInt(MAX_FIELD_NUM));
+ // HACK: joined numeric point fields need docValues.. for now just skip _is fields if we are dealing with points.
+ if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP) && (from.endsWith("_is") || to.endsWith("_is")))
+ {
+ continue;
+ }
+ break;
+ }
+
+ // keep it simple, only filter on string fields - not point of test
+ final String filterField = strfield(random().nextInt(MAX_FIELD_NUM));
+
+ final String filter = random().nextBoolean() ? null : filterField+":[* TO *]";
+ return new JoinDomain(from, to, filter);
+ }
+ }
+
+ /**
+ * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed
+ * at a node in our cluster
+ */
+ public static SolrClient getRandClient(Random rand) {
+ int numClients = CLIENTS.size();
+ int idx = TestUtil.nextInt(rand, 0, numClients);
+
+ return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx);
+ }
+
+ public static void waitForRecoveriesToFinish(CloudSolrClient client) throws Exception {
+ assert null != client.getDefaultCollection();
+ AbstractDistribZkTestBase.waitForRecoveriesToFinish(client.getDefaultCollection(),
+ client.getZkStateReader(),
+ true, true, 330);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
new file mode 100644
index 0000000..e212993
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
@@ -0,0 +1,678 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.lang.StringUtils;
+
+import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.AbstractDistribZkTestBase;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
+import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>
+ * A randomized test of nested facets using the <code>relatedness()</code> function, that asserts the
+ * accuracy the results for all the buckets returned using verification queries of the (expected)
+ * foreground & background queries based on the nested facet terms.
+ * <p>
+ * Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute
+ * for refinement in order to ensure accurate "skg" computation across shards. For that reason, this
+ * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a
+ * <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee that all shards
+ * participate in all facets, so that the popularity & relatedness values returned can be proven
+ * with validation requests.
+ * </p>
+ * <p>
+ * (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary to
+ * prevent situations where a single shardX may return candidate bucket with no child-buckets due to
+ * the normal facet intersections, but when refined on other shardY(s), can produce "high scoring"
+ * SKG child-buckets, which would then be missing the foreground/background "size" contributions from
+ * shardX.
+ * </p>
+ *
+ * @see TestCloudJSONFacetJoinDomain
+ */
+@Slow
+public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName();
+ private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection";
+
+ private static final int DEFAULT_LIMIT = FacetField.DEFAULT_FACET_LIMIT;
+ private static final int MAX_FIELD_NUM = 15;
+ private static final int UNIQUE_FIELD_VALS = 50;
+
+ /** Multivalued string field suffixes that can be randomized for testing diff facet/join code paths */
+ private static final String[] STR_FIELD_SUFFIXES = new String[] { "_ss", "_sds", "_sdsS" };
+ /** Multivalued int field suffixes that can be randomized for testing diff facet/join code paths */
+ private static final String[] INT_FIELD_SUFFIXES = new String[] { "_is", "_ids", "_idsS" };
+
+ /** A basic client for operations at the cloud level, default collection will be set */
+ private static CloudSolrClient CLOUD_CLIENT;
+ /** One client per node */
+ private static ArrayList<HttpSolrClient> CLIENTS = new ArrayList<>(5);
+
+ @BeforeClass
+ private static void createMiniSolrCloudCluster() throws Exception {
+ // sanity check constants
+ assertTrue("bad test constants: some suffixes will never be tested",
+ (STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) && (INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM));
+
+ // we need DVs on point fields to compute stats & facets
+ if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
+
+ // multi replicas should not matter...
+ final int repFactor = usually() ? 1 : 2;
+ // ... but we definitely want to test multiple shards
+ final int numShards = TestUtil.nextInt(random(), 1, (usually() ? 2 :3));
+ final int numNodes = (numShards * repFactor);
+
+ final String configName = DEBUG_LABEL + "_config-set";
+ final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
+
+ configureCluster(numNodes).addConfig(configName, configDir).configure();
+
+ Map<String, String> collectionProperties = new LinkedHashMap<>();
+ collectionProperties.put("config", "solrconfig-tlog.xml");
+ collectionProperties.put("schema", "schema_latest.xml");
+ CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
+ .setProperties(collectionProperties)
+ .process(cluster.getSolrClient());
+
+ CLOUD_CLIENT = cluster.getSolrClient();
+ CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
+
+ waitForRecoveriesToFinish(CLOUD_CLIENT);
+
+ for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
+ CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));
+ }
+
+ final int numDocs = atLeast(100);
+ for (int id = 0; id < numDocs; id++) {
+ SolrInputDocument doc = sdoc("id", ""+id);
+ for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
+ // NOTE: we ensure every doc has at least one value in each field
+ // that way, if a term is returned for a parent there there is garunteed to be at least one
+ // one term in the child facet as well.
+ //
+ // otherwise, we'd face the risk of a single shardX returning parentTermX as a top term for
+ // the parent facet, but having no child terms -- meanwhile on refinement another shardY that
+ // did *not* returned parentTermX in phase#1, could return some *new* child terms under
+ // parentTermX, but their stats would not include the bgCount from shardX.
+ //
+ // in normal operation, this is an edge case that isn't a big deal because the ratios &
+ // relatedness scores are statistically approximate, but for the purpose of this test where
+ // we verify correctness via exactness we need all shards to contribute to the SKG statistics
+ final int numValsThisDoc = TestUtil.nextInt(random(), 1, (usually() ? 5 : 10));
+ for (int v = 0; v < numValsThisDoc; v++) {
+ final String fieldValue = randFieldValue(fieldNum);
+
+ // for each fieldNum, there are actaully two fields: one string, and one integer
+ doc.addField(field(STR_FIELD_SUFFIXES, fieldNum), fieldValue);
+ doc.addField(field(INT_FIELD_SUFFIXES, fieldNum), fieldValue);
+ }
+ }
+ CLOUD_CLIENT.add(doc);
+ if (random().nextInt(100) < 1) {
+ CLOUD_CLIENT.commit(); // commit 1% of the time to create new segments
+ }
+ if (random().nextInt(100) < 5) {
+ CLOUD_CLIENT.add(doc); // duplicate the doc 5% of the time to create deleted docs
+ }
+ }
+ CLOUD_CLIENT.commit();
+ }
+
+ /**
+ * Given a (random) number, and a (static) array of possible suffixes returns a consistent field name that
+ * uses that number and one of hte specified suffixes in it's name.
+ *
+ * @see #STR_FIELD_SUFFIXES
+ * @see #INT_FIELD_SUFFIXES
+ * @see #MAX_FIELD_NUM
+ * @see #randFieldValue
+ */
+ private static String field(final String[] suffixes, final int fieldNum) {
+ assert fieldNum < MAX_FIELD_NUM;
+
+ final String suffix = suffixes[fieldNum % suffixes.length];
+ return "field_" + fieldNum + suffix;
+ }
+ private static String strfield(final int fieldNum) {
+ return field(STR_FIELD_SUFFIXES, fieldNum);
+ }
+ private static String intfield(final int fieldNum) {
+ return field(INT_FIELD_SUFFIXES, fieldNum);
+ }
+
+ /**
+ * Given a (random) field number, returns a random (integer based) value for that field.
+ * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
+ * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins
+ * will match fewer documents based on how far apart the field numbers are.
+ *
+ * @see #UNIQUE_FIELD_VALS
+ * @see #field
+ */
+ private static String randFieldValue(final int fieldNum) {
+ return "" + (fieldNum + TestUtil.nextInt(random(), 1, UNIQUE_FIELD_VALS));
+ }
+
+
+ @AfterClass
+ private static void afterClass() throws Exception {
+ CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
+ for (HttpSolrClient client : CLIENTS) {
+ client.close();
+ }
+ CLIENTS = null;
+ }
+
+ /**
+ * Test some small, hand crafted, but non-trivial queries that are
+ * easier to trace/debug then a pure random monstrosity.
+ * (ie: if something obvious gets broken, this test may fail faster and in a more obvious way then testRandom)
+ */
+ public void testBespoke() throws Exception {
+ { // trivial single level facet
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), UNIQUE_FIELD_VALS, 0, null);
+ facets.put("top1", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
+ }
+
+ { // trivial single level facet w/sorting on skg
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ TermFacet top = new TermFacet(strfield(9), UNIQUE_FIELD_VALS, 0, "skg desc");
+ facets.put("top2", top);
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
+ }
+
+ { // trivial single level facet w/ 2 diff ways to request "limit = (effectively) Infinite"
+ // to sanity check refinement of buckets missing from other shard in both cases
+
+ // NOTE that these two queries & facets *should* effectively identical given that the
+ // very large limit value is big enough no shard will ever return that may terms,
+ // but the "limit=-1" case it actaully triggers slightly different code paths
+ // because it causes FacetField.returnsPartial() to be "true"
+ for (int limit : new int[] { 999999999, -1 }) {
+ Map<String,TermFacet> facets = new LinkedHashMap<>();
+ facets.put("top_facet_limit__" + limit, new TermFacet(strfield(9), limit, 0, "skg desc"));
+ final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
+ assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
+ assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
+ }
+ }
+ }
+
+ public void testRandom() throws Exception {
+
+ // since the "cost" of verifying the stats for each bucket is so high (see TODO in verifySKGResults())
+ // we put a safety valve in place on the maximum number of buckets that we are willing to verify
+ // across *all* the queries that we do.
+ // that way if the randomized queries we build all have relatively small facets, so be it, but if
+ // we get a really big one early on, we can test as much as possible, skip other iterations.
+ //
+ // (deeply nested facets may contain more buckets then the max, but we won't *check* all of them)
+ final int maxBucketsAllowed = atLeast(2000);
+ final AtomicInteger maxBucketsToCheck = new AtomicInteger(maxBucketsAllowed);
+
+ final int numIters = atLeast(10);
+ for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
+ assertFacetSKGsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(),
+ buildRandomQuery(), buildRandomQuery(), buildRandomQuery());
+ }
+ assertTrue("Didn't check a single bucket???", maxBucketsToCheck.get() < maxBucketsAllowed);
+
+
+ }
+
+ /**
+ * Generates a random query string across the randomized fields/values in the index
+ *
+ * @see #randFieldValue
+ * @see #field
+ */
+ private static String buildRandomQuery() {
+ if (0 == TestUtil.nextInt(random(), 0,10)) {
+ return "*:*";
+ }
+ final int numClauses = TestUtil.nextInt(random(), 3, 10);
+ final String[] clauses = new String[numClauses];
+ for (int c = 0; c < numClauses; c++) {
+ final int fieldNum = random().nextInt(MAX_FIELD_NUM);
+ // keep queries simple, just use str fields - not point of test
+ clauses[c] = strfield(fieldNum) + ":" + randFieldValue(fieldNum);
+ }
+ return buildORQuery(clauses);
+ }
+
+ private static String buildORQuery(String... clauses) {
+ assert 0 < clauses.length;
+ return "(" + StringUtils.join(clauses, " OR ") + ")";
+ }
+
+ /**
+ * Given a set of term facets, and top level query strings, asserts that
+ * the SKG stats for each facet term returned when executing that query with those foreground/background
+ * queries match the expected results of executing the equivalent queries in isolation.
+ *
+ * @see #verifySKGResults
+ */
+ private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
+ Map<String,TermFacet> expected,
+ final String query,
+ final String foreQ,
+ final String backQ) throws SolrServerException, IOException {
+ final SolrParams baseParams = params("rows","0", "fore", foreQ, "back", backQ);
+
+ final SolrParams facetParams = params("q", query,
+ "json.facet", ""+TermFacet.toJSONFacetParamValue(expected,null));
+ final SolrParams initParams = SolrParams.wrapAppended(facetParams, baseParams);
+
+ log.info("Doing full run: {}", initParams);
+
+ QueryResponse rsp = null;
+ // JSON Facets not (currently) available from QueryResponse...
+ NamedList topNamedList = null;
+ try {
+ rsp = (new QueryRequest(initParams)).process(getRandClient(random()));
+ assertNotNull(initParams + " is null rsp?", rsp);
+ topNamedList = rsp.getResponse();
+ assertNotNull(initParams + " is null topNamedList?", topNamedList);
+ } catch (Exception e) {
+ throw new RuntimeException("init query failed: " + initParams + ": " +
+ e.getMessage(), e);
+ }
+ try {
+ final NamedList facetResponse = (NamedList) topNamedList.get("facets");
+ assertNotNull("null facet results?", facetResponse);
+ assertEquals("numFound mismatch with top count?",
+ rsp.getResults().getNumFound(), ((Number)facetResponse.get("count")).longValue());
+
+ // Note: even if the query has numFound=0, our explicit background query domain should
+ // still force facet results
+ // (even if the background query matches nothing, that just means there will be no
+ // buckets in those facets)
+ assertFacetSKGsAreCorrect(maxBucketsToCheck, expected, baseParams, facetResponse);
+
+ } catch (AssertionError e) {
+ throw new AssertionError(initParams + " ===> " + topNamedList + " --> " + e.getMessage(), e);
+ } finally {
+ log.info("Ending full run");
+ }
+ }
+
+ /**
+ * Recursive helper method that walks the actual facet response, comparing the SKG results to
+ * the expected output based on the equivalent filters generated from the original TermFacet.
+ */
+ private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
+ final Map<String,TermFacet> expected,
+ final SolrParams baseParams,
+ final NamedList actualFacetResponse) throws SolrServerException, IOException {
+
+ for (Map.Entry<String,TermFacet> entry : expected.entrySet()) {
+ final String facetKey = entry.getKey();
+ final TermFacet facet = entry.getValue();
+ final NamedList results = (NamedList) actualFacetResponse.get(facetKey);
+ assertNotNull(facetKey + " key missing from: " + actualFacetResponse, results);
+ final List<NamedList> buckets = (List<NamedList>) results.get("buckets");
+ assertNotNull(facetKey + " has null buckets: " + actualFacetResponse, buckets);
+
+ if (buckets.isEmpty()) {
+ // should only happen if the background query does not match any docs with field X
+ final long docsWithField = getNumFound(params("_trace", "noBuckets",
+ "rows", "0",
+ "q", facet.field+":[* TO *]",
+ "fq", baseParams.get("back")));
+
+ assertEquals(facetKey + " has no buckets, but docs in background exist with field: " + facet.field,
+ 0, docsWithField);
+ }
+
+ // NOTE: it's important that we do this depth first -- not just because it's the easiest way to do it,
+ // but because it means that our maxBucketsToCheck will ensure we do a lot of deep sub-bucket checking,
+ // not just all the buckets of the top level(s) facet(s)
+ for (NamedList bucket : buckets) {
+ final String fieldVal = bucket.get("val").toString(); // int or stringified int
+
+ verifySKGResults(facetKey, facet, baseParams, fieldVal, bucket);
+ if (maxBucketsToCheck.decrementAndGet() <= 0) {
+ return;
+ }
+
+ final SolrParams verifyParams = SolrParams.wrapAppended(baseParams,
+ params("fq", facet.field + ":" + fieldVal));
+
+ // recursively check subFacets
+ if (! facet.subFacets.isEmpty()) {
+ assertFacetSKGsAreCorrect(maxBucketsToCheck, facet.subFacets, verifyParams, bucket);
+ }
+ }
+ }
+
+ { // make sure we don't have any facet keys we don't expect
+ // a little hackish because subfacets have extra keys...
+ final LinkedHashSet expectedKeys = new LinkedHashSet(expected.keySet());
+ expectedKeys.add("count");
+ if (0 <= actualFacetResponse.indexOf("val",0)) {
+ expectedKeys.add("val");
+ expectedKeys.add("skg");
+ }
+ assertEquals("Unexpected keys in facet response",
+ expectedKeys, actualFacetResponse.asShallowMap().keySet());
+ }
+ }
+
+ /**
+ * Verifies that the popularity & relatedness values containined in a single SKG bucket
+ * match the expected values based on the facet field & bucket value, as well the existing
+ * filterParams.
+ *
+ * @see #assertFacetSKGsAreCorrect
+ */
+ private void verifySKGResults(String facetKey, TermFacet facet, SolrParams filterParams,
+ String fieldVal, NamedList<Object> bucket)
+ throws SolrServerException, IOException {
+
+ final String bucketQ = facet.field+":"+fieldVal;
+ final NamedList<Object> skgBucket = (NamedList<Object>) bucket.get("skg");
+ assertNotNull(facetKey + "/bucket:" + bucket.toString(), skgBucket);
+
+ // TODO: make this more efficient?
+ // ideally we'd do a single query w/4 facet.queries, one for each count
+ // but formatting the queries is a pain, currently we leverage the accumulated fq's
+ final long fgSize = getNumFound(SolrParams.wrapAppended(params("_trace", "fgSize",
+ "rows","0",
+ "q","{!query v=$fore}"),
+ filterParams));
+ final long bgSize = getNumFound(params("_trace", "bgSize",
+ "rows","0",
+ "q", filterParams.get("back")));
+
+ final long fgCount = getNumFound(SolrParams.wrapAppended(params("_trace", "fgCount",
+ "rows","0",
+ "q","{!query v=$fore}",
+ "fq", bucketQ),
+ filterParams));
+ final long bgCount = getNumFound(params("_trace", "bgCount",
+ "rows","0",
+ "q", bucketQ,
+ "fq", filterParams.get("back")));
+
+ assertEquals(facetKey + "/bucket:" + bucket + " => fgPop should be: " + fgCount + " / " + bgSize,
+ roundTo5Digits((double) fgCount / bgSize),
+ skgBucket.get("foreground_popularity"));
+ assertEquals(facetKey + "/bucket:" + bucket + " => bgPop should be: " + bgCount + " / " + bgSize,
+ roundTo5Digits((double) bgCount / bgSize),
+ skgBucket.get("background_popularity"));
+ assertEquals(facetKey + "/bucket:" + bucket + " => relatedness is wrong",
+ roundTo5Digits(computeRelatedness(fgCount, fgSize, bgCount, bgSize)),
+ skgBucket.get("relatedness"));
+
+ }
+
+
+ /**
+ * Trivial data structure for modeling a simple terms facet that can be written out as a json.facet param.
+ *
+ * Doesn't do any string escaping or quoting, so don't use whitespace or reserved json characters
+ */
+ private static final class TermFacet {
+ public final String field;
+ public final Map<String,TermFacet> subFacets = new LinkedHashMap<>();
+ public final Integer limit; // may be null
+ public final Integer overrequest; // may be null
+ public final String sort; // may be null
+ /** Simplified constructor asks for limit = # unique vals */
+ public TermFacet(String field) {
+ this(field, UNIQUE_FIELD_VALS, 0, "skg desc");
+
+ }
+ public TermFacet(String field, Integer limit, Integer overrequest, String sort) {
+ assert null != field;
+ this.field = field;
+ this.limit = limit;
+ this.overrequest = overrequest;
+ this.sort = sort;
+ }
+
+ /**
+ * recursively generates the <code>json.facet</code> param value to use for testing this facet
+ */
+ private CharSequence toJSONFacetParamValue() {
+ final String limitStr = (null == limit) ? "" : (", limit:" + limit);
+ final String overrequestStr = (null == overrequest) ? "" : (", overrequest:" + overrequest);
+ final String sortStr = (null == sort) ? "" : (", sort: '" + sort + "'");
+ final StringBuilder sb
+ = new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + sortStr);
+
+ // see class javadocs for why we always use refine:true & the query:'*:*' domain for this test.
+ sb.append(", refine: true, domain: { query: '*:*' }, facet:");
+ sb.append(toJSONFacetParamValue(subFacets, "skg : 'relatedness($fore,$back)'"));
+ sb.append("}");
+ return sb;
+ }
+
+ /**
+ * Given a set of (possibly nested) facets, generates a suitable <code>json.facet</code> param value to
+ * use for testing them against in a solr request.
+ */
+ public static CharSequence toJSONFacetParamValue(final Map<String,TermFacet> facets,
+ final String extraJson) {
+ assert null != facets;
+ if (0 == facets.size() && null == extraJson) {
+ return "";
+ }
+
+ StringBuilder sb = new StringBuilder("{ processEmpty: true, ");
+ for (String key : facets.keySet()) {
+ sb.append(key).append(" : ").append(facets.get(key).toJSONFacetParamValue());
+ sb.append(" ,");
+ }
+ if (null == extraJson) {
+ sb.setLength(sb.length() - 1);
+ } else {
+ sb.append(extraJson);
+ }
+ sb.append("}");
+ return sb;
+ }
+
+ /**
+ * Factory method for generating some random facets.
+ *
+ * For simplicity, each facet will have a unique key name.
+ */
+ public static Map<String,TermFacet> buildRandomFacets() {
+ // for simplicity, use a unique facet key regardless of depth - simplifies verification
+ // and le's us enforce a hard limit on the total number of facets in a request
+ AtomicInteger keyCounter = new AtomicInteger(0);
+
+ final int maxDepth = TestUtil.nextInt(random(), 0, (usually() ? 2 : 3));
+ return buildRandomFacets(keyCounter, maxDepth);
+ }
+
+ /**
+ * picks a random value for the "sort" param, biased in favor of interesting test cases
+ *
+ * @return a sort string (w/direction), or null to specify nothing (trigger default behavior)
+ * @see #randomLimitParam
+ */
+ public static String randomSortParam(Random r) {
+
+ // IMPORTANT!!!
+ // if this method is modified to produce new sorts, make sure to update
+ // randomLimitParam to account for them if they are impacted by SOLR-12556
+ final String dir = random().nextBoolean() ? "asc" : "desc";
+ switch(r.nextInt(4)) {
+ case 0: return null;
+ case 1: return "count " + dir;
+ case 2: return "skg " + dir;
+ case 3: return "index " + dir;
+ default: throw new RuntimeException("Broken case statement");
+ }
+ }
+ /**
+ * picks a random value for the "limit" param, biased in favor of interesting test cases
+ *
+ * <p>
+ * <b>NOTE:</b> Due to SOLR-12556, we have to force an overrequest of "all" possible terms for
+ * some sort values.
+ * </p>
+ *
+ * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
+ * @see #UNIQUE_FIELD_VALS
+ * @see #randomSortParam
+ */
+ public static Integer randomLimitParam(Random r, final String sort) {
+ if (null != sort) {
+ if (sort.equals("count asc") || sort.startsWith("skg")) {
+ // of the known types of sorts produced, these are at risk of SOLR-12556
+ // so request (effectively) unlimited num buckets
+ return r.nextBoolean() ? UNIQUE_FIELD_VALS : -1;
+ }
+ }
+ final int limit = 1 + r.nextInt((int) (UNIQUE_FIELD_VALS * 1.5F));
+ if (limit >= UNIQUE_FIELD_VALS && r.nextBoolean()) {
+ return -1; // unlimited
+ } else if (limit == DEFAULT_LIMIT && r.nextBoolean()) {
+ return null; // sometimes, don't specify limit if it's the default
+ }
+ return limit;
+ }
+
+ /**
+ * picks a random value for the "overrequest" param, biased in favor of interesting test cases.
+ *
+ * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
+ * @see #UNIQUE_FIELD_VALS
+ */
+ public static Integer randomOverrequestParam(Random r) {
+ switch(r.nextInt(10)) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ return 0; // 40% of the time, disable overrequest to better stress refinement
+ case 4:
+ case 5:
+ return r.nextInt(UNIQUE_FIELD_VALS); // 20% ask for less them what's needed
+ case 6:
+ return r.nextInt(Integer.MAX_VALUE); // 10%: completley random value, statisticaly more then enough
+ default: break;
+ }
+ // else.... either leave param unspecified (or redundently specify the -1 default)
+ return r.nextBoolean() ? null : -1;
+ }
+
+ /**
+ * recursive helper method for building random facets
+ *
+ * @param keyCounter used to ensure every generated facet has a unique key name
+ * @param maxDepth max possible depth allowed for the recusion, a lower value may be used depending on how many facets are returned at the current level.
+ */
+ private static Map<String,TermFacet> buildRandomFacets(AtomicInteger keyCounter, int maxDepth) {
+ final int numFacets = Math.max(1, TestUtil.nextInt(random(), -1, 3)); // 3/5th chance of being '1'
+ Map<String,TermFacet> results = new LinkedHashMap<>();
+ for (int i = 0; i < numFacets; i++) {
+ if (keyCounter.get() < 3) { // a hard limit on the total number of facets (regardless of depth) to reduce OOM risk
+
+ final String sort = randomSortParam(random());
+ final Integer limit = randomLimitParam(random(), sort);
+ final Integer overrequest = randomOverrequestParam(random());
+ final TermFacet facet = new TermFacet(field((random().nextBoolean()
+ ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES),
+ random().nextInt(MAX_FIELD_NUM)),
+ limit, overrequest, sort);
+ results.put("facet_" + keyCounter.incrementAndGet(), facet);
+ if (0 < maxDepth) {
+ // if we're going wide, don't go deep
+ final int nextMaxDepth = Math.max(0, maxDepth - numFacets);
+ facet.subFacets.putAll(buildRandomFacets(keyCounter, TestUtil.nextInt(random(), 0, nextMaxDepth)));
+ }
+ }
+ }
+ return results;
+ }
+ }
+
+ /**
+ * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed
+ * at a node in our cluster
+ */
+ public static SolrClient getRandClient(Random rand) {
+ int numClients = CLIENTS.size();
+ int idx = TestUtil.nextInt(rand, 0, numClients);
+
+ return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx);
+ }
+
+ /**
+ * Uses a random SolrClient to execture a request and returns only the numFound
+ * @see #getRandClient
+ */
+ public static long getNumFound(final SolrParams req) throws SolrServerException, IOException {
+ return getRandClient(random()).query(req).getResults().getNumFound();
+ }
+
+ public static void waitForRecoveriesToFinish(CloudSolrClient client) throws Exception {
+ assert null != client.getDefaultCollection();
+ AbstractDistribZkTestBase.waitForRecoveriesToFinish(client.getDefaultCollection(),
+ client.getZkStateReader(),
+ true, true, 330);
+ }
+
+}