You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/10/31 05:07:11 UTC
[25/50] [abbrv] lucene-solr:jira/http2_benchmark: SOLR-12793: Move
TestCloudJSONFacetJoinDomain amd TestCloudJSONFacetSKG to the facet test
package
SOLR-12793: Move TestCloudJSONFacetJoinDomain amd TestCloudJSONFacetSKG to the facet test package
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/71988c75
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/71988c75
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/71988c75
Branch: refs/heads/jira/http2_benchmark
Commit: 71988c756b76a96cb96fc8f86183219a4a008389
Parents: 16ee847
Author: Varun Thacker <va...@apache.org>
Authored: Wed Oct 24 13:57:28 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Wed Oct 24 13:57:35 2018 -0700
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../cloud/TestCloudJSONFacetJoinDomain.java | 853 ------------------
.../solr/cloud/TestCloudJSONFacetSKG.java | 677 ---------------
.../org/apache/solr/search/facet/DebugAgg.java | 2 +-
.../facet/TestCloudJSONFacetJoinDomain.java | 855 +++++++++++++++++++
.../search/facet/TestCloudJSONFacetSKG.java | 678 +++++++++++++++
6 files changed, 1536 insertions(+), 1531 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 32c8ae2..bb8f86d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -178,6 +178,8 @@ Other Changes
* SOLR-12423: Upgrade to Tika 1.19.1 when available (Tim Allison via Erick Erickson)
+* SOLR-12793: Move TestCloudJSONFacetJoinDomain amd TestCloudJSONFacetSKG to the facet test package (Varun Thacker)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetJoinDomain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetJoinDomain.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetJoinDomain.java
deleted file mode 100644
index 150a08d..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetJoinDomain.java
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cloud;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.lucene.util.TestUtil;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.embedded.JettySolrRunner;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.search.facet.FacetField;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * <p>
- * Tests randomized JSON Facets, sometimes using query 'join' domain transfers and/or domain 'filter' options
- * </p>
- * <p>
- * The results of each facet constraint count will be compared with a verification query using an equivalent filter
- * </p>
- *
- * @see TestCloudPivotFacet
- */
-public class TestCloudJSONFacetJoinDomain extends SolrCloudTestCase {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName();
- private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection";
-
- private static final int DEFAULT_LIMIT = FacetField.DEFAULT_FACET_LIMIT;
- private static final int MAX_FIELD_NUM = 15;
- private static final int UNIQUE_FIELD_VALS = 20;
-
- // NOTE: set to 'true' to see if refinement testing is adequate (should get fails occasionally)
- private static final boolean FORCE_DISABLE_REFINEMENT = false;
-
- /** Multivalued string field suffixes that can be randomized for testing diff facet/join code paths */
- private static final String[] STR_FIELD_SUFFIXES = new String[] { "_ss", "_sds", "_sdsS" };
- /** Multivalued int field suffixes that can be randomized for testing diff facet/join code paths */
- private static final String[] INT_FIELD_SUFFIXES = new String[] { "_is", "_ids", "_idsS" };
-
- /** A basic client for operations at the cloud level, default collection will be set */
- private static CloudSolrClient CLOUD_CLIENT;
- /** One client per node */
- private static ArrayList<HttpSolrClient> CLIENTS = new ArrayList<>(5);
-
- @BeforeClass
- private static void createMiniSolrCloudCluster() throws Exception {
- // sanity check constants
- assertTrue("bad test constants: some suffixes will never be tested",
- (STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) && (INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM));
-
- // we need DVs on point fields to compute stats & facets
- if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
-
- // multi replicas should not matter...
- final int repFactor = usually() ? 1 : 2;
- // ... but we definitely want to test multiple shards
- final int numShards = TestUtil.nextInt(random(), 1, (usually() ? 2 :3));
- final int numNodes = (numShards * repFactor);
-
- final String configName = DEBUG_LABEL + "_config-set";
- final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
-
- configureCluster(numNodes).addConfig(configName, configDir).configure();
-
- Map<String, String> collectionProperties = new LinkedHashMap<>();
- collectionProperties.put("config", "solrconfig-tlog.xml");
- collectionProperties.put("schema", "schema_latest.xml");
- CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
- .setProperties(collectionProperties)
- .process(cluster.getSolrClient());
-
- CLOUD_CLIENT = cluster.getSolrClient();
- CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
-
- waitForRecoveriesToFinish(CLOUD_CLIENT);
-
- for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
- CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));
- }
-
- final int numDocs = atLeast(100);
- for (int id = 0; id < numDocs; id++) {
- SolrInputDocument doc = sdoc("id", ""+id);
- for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
- // NOTE: some docs may have no value in a field
- final int numValsThisDoc = TestUtil.nextInt(random(), 0, (usually() ? 3 : 6));
- for (int v = 0; v < numValsThisDoc; v++) {
- final String fieldValue = randFieldValue(fieldNum);
-
- // for each fieldNum, there are actaully two fields: one string, and one integer
- doc.addField(field(STR_FIELD_SUFFIXES, fieldNum), fieldValue);
- doc.addField(field(INT_FIELD_SUFFIXES, fieldNum), fieldValue);
- }
- }
- CLOUD_CLIENT.add(doc);
- if (random().nextInt(100) < 1) {
- CLOUD_CLIENT.commit(); // commit 1% of the time to create new segments
- }
- if (random().nextInt(100) < 5) {
- CLOUD_CLIENT.add(doc); // duplicate the doc 5% of the time to create deleted docs
- }
- }
- CLOUD_CLIENT.commit();
- }
-
- /**
- * Given a (random) number, and a (static) array of possible suffixes returns a consistent field name that
- * uses that number and one of hte specified suffixes in it's name.
- *
- * @see #STR_FIELD_SUFFIXES
- * @see #INT_FIELD_SUFFIXES
- * @see #MAX_FIELD_NUM
- * @see #randFieldValue
- */
- private static String field(final String[] suffixes, final int fieldNum) {
- assert fieldNum < MAX_FIELD_NUM;
-
- final String suffix = suffixes[fieldNum % suffixes.length];
- return "field_" + fieldNum + suffix;
- }
- private static String strfield(final int fieldNum) {
- return field(STR_FIELD_SUFFIXES, fieldNum);
- }
- private static String intfield(final int fieldNum) {
- return field(INT_FIELD_SUFFIXES, fieldNum);
- }
-
- /**
- * Given a (random) field number, returns a random (integer based) value for that field.
- * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
- * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins
- * will match fewer documents based on how far apart the field numbers are.
- *
- * @see #UNIQUE_FIELD_VALS
- * @see #field
- */
- private static String randFieldValue(final int fieldNum) {
- return "" + (fieldNum + TestUtil.nextInt(random(), 1, UNIQUE_FIELD_VALS));
- }
-
-
- @AfterClass
- private static void afterClass() throws Exception {
- CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
- for (HttpSolrClient client : CLIENTS) {
- client.close();
- }
- CLIENTS = null;
- }
-
- /** Sanity check that malformed requests produce errors */
- public void testMalformedGivesError() throws Exception {
-
- ignoreException(".*'join' domain change.*");
-
- for (String join : Arrays.asList("bogus",
- "{ }",
- "{ from:null, to:foo_s }",
- "{ from:foo_s }",
- "{ from:foo_s, to:foo_s, bogus:'what what?' }",
- "{ to:foo_s, bogus:'what what?' }")) {
- SolrException e = expectThrows(SolrException.class, () -> {
- final SolrParams req = params("q", "*:*", "json.facet",
- "{ x : { type:terms, field:x_s, domain: { join:"+join+" } } }");
- final NamedList trash = getRandClient(random()).request(new QueryRequest(req));
- });
- assertEquals(join + " -> " + e, SolrException.ErrorCode.BAD_REQUEST.code, e.code());
- assertTrue(join + " -> " + e, e.getMessage().contains("'join' domain change"));
- }
- }
-
- public void testSanityCheckDomainMethods() throws Exception {
- {
- final JoinDomain empty = new JoinDomain(null, null, null);
- assertEquals(null, empty.toJSONFacetParamValue());
- final SolrParams out = empty.applyDomainToQuery("safe_key", params("q","qqq"));
- assertNotNull(out);
- assertEquals(null, out.get("safe_key"));
- assertEquals("qqq", out.get("q"));
- }
- {
- final JoinDomain join = new JoinDomain("xxx", "yyy", null);
- assertEquals("domain:{join:{from:xxx,to:yyy}}", join.toJSONFacetParamValue().toString());
- final SolrParams out = join.applyDomainToQuery("safe_key", params("q","qqq"));
- assertNotNull(out);
- assertEquals("qqq", out.get("safe_key"));
- assertEquals("{!join from=xxx to=yyy v=$safe_key}", out.get("q"));
-
- }
- {
- final JoinDomain filter = new JoinDomain(null, null, "zzz");
- assertEquals("domain:{filter:'zzz'}", filter.toJSONFacetParamValue().toString());
- final SolrParams out = filter.applyDomainToQuery("safe_key", params("q","qqq"));
- assertNotNull(out);
- assertEquals(null, out.get("safe_key"));
- assertEquals("zzz AND qqq", out.get("q"));
- }
- {
- final JoinDomain both = new JoinDomain("xxx", "yyy", "zzz");
- assertEquals("domain:{join:{from:xxx,to:yyy},filter:'zzz'}", both.toJSONFacetParamValue().toString());
- final SolrParams out = both.applyDomainToQuery("safe_key", params("q","qqq"));
- assertNotNull(out);
- assertEquals("qqq", out.get("safe_key"));
- assertEquals("zzz AND {!join from=xxx to=yyy v=$safe_key}", out.get("q"));
- }
- }
-
- /**
- * Test some small, hand crafted, but non-trivial queries that are
- * easier to trace/debug then a pure random monstrosity.
- * (ie: if something obvious gets broken, this test may fail faster and in a more obvious way then testRandom)
- */
- public void testBespoke() throws Exception {
-
- { // sanity check our test methods can handle a query matching no docs
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"));
- top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
- facets.put("empty_top", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, strfield(7) + ":bogus");
- assertEquals("Empty search result shouldn't have found a single bucket",
- UNIQUE_FIELD_VALS, maxBuckets.get());
- }
-
- { // sanity check our test methods can handle a query where a facet filter prevents any doc from having terms
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), new JoinDomain(null, null, "-*:*"));
- top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
- facets.put("filtered_top", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, "*:*");
- assertEquals("Empty join filter shouldn't have found a single bucket",
- UNIQUE_FIELD_VALS, maxBuckets.get());
- }
-
- { // sanity check our test methods can handle a query where a facet filter prevents any doc from having sub-terms
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(8), strfield(8), null));
- top.subFacets.put("sub", new TermFacet(strfield(11), new JoinDomain(null, null, "-*:*")));
- facets.put("filtered_top", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, "*:*");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
- }
-
- { // strings
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"));
- top.subFacets.put("facet_5", new TermFacet(strfield(11), new JoinDomain(strfield(8), strfield(8), null)));
- facets.put("facet_4", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":16 OR "+strfield(9)+":16 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
- }
-
- { // ints
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(intfield(9), new JoinDomain(intfield(5), intfield(9), null));
- facets.put("top", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+intfield(7)+":16 OR "+intfield(3)+":13)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
- }
-
- { // some domains with filter only, no actual join
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), new JoinDomain(null, null, strfield(9)+":[* TO *]"));
- top.subFacets.put("facet_5", new TermFacet(strfield(11), new JoinDomain(null, null, strfield(3)+":[* TO 5]")));
- facets.put("top", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":16 OR "+strfield(9)+":16 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS * UNIQUE_FIELD_VALS);
-
- }
-
- { // low limits, explicit refinement
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9),
- new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
- 5, 0, true);
- top.subFacets.put("facet_5", new TermFacet(strfield(11),
- new JoinDomain(strfield(8), strfield(8), null),
- 10, 0, true));
- facets.put("facet_4", top);
- final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
- }
-
- { // low limit, high overrequest
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9),
- new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
- 5, UNIQUE_FIELD_VALS + 10, false);
- top.subFacets.put("facet_5", new TermFacet(strfield(11),
- new JoinDomain(strfield(8), strfield(8), null),
- 10, UNIQUE_FIELD_VALS + 10, false));
- facets.put("facet_4", top);
- final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
- }
-
- { // low limit, low overrequest, explicit refinement
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9),
- new JoinDomain(strfield(5), strfield(9), strfield(9)+":[* TO *]"),
- 5, 7, true);
- top.subFacets.put("facet_5", new TermFacet(strfield(11),
- new JoinDomain(strfield(8), strfield(8), null),
- 10, 7, true));
- facets.put("facet_4", top);
- final AtomicInteger maxBuckets = new AtomicInteger(5 * 10);
- assertFacetCountsAreCorrect(maxBuckets, facets, "("+strfield(7)+":6 OR "+strfield(9)+":6 OR "+strfield(6)+":19 OR "+strfield(0)+":11)");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < 5 * 10);
- }
-
- }
-
- public void testTheTestRandomRefineParam() {
- // sanity check that randomRefineParam never violates isRefinementNeeded
- // (should be imposisble ... unless someone changes/breaks the randomization logic in the future)
- final int numIters = atLeast(100);
- for (int iter = 0; iter < numIters; iter++) {
- final Integer limit = TermFacet.randomLimitParam(random());
- final Integer overrequest = TermFacet.randomOverrequestParam(random());
- final Boolean refine = TermFacet.randomRefineParam(random(), limit, overrequest);
- if (TermFacet.isRefinementNeeded(limit, overrequest)) {
- assertEquals("limit: " + limit + ", overrequest: " + overrequest + ", refine: " + refine,
- Boolean.TRUE, refine);
- }
- }
- }
-
- public void testTheTestTermFacetShouldFreakOutOnBadRefineOptions() {
- expectThrows(AssertionError.class, () -> {
- final TermFacet bogus = new TermFacet("foo", null, 5, 0, false);
- });
- }
-
- public void testRandom() throws Exception {
-
- // we put a safety valve in place on the maximum number of buckets that we are willing to verify
- // across *all* the queries that we do.
- // that way if the randomized queries we build all have relatively small facets, so be it, but if
- // we get a really big one early on, we can test as much as possible, skip other iterations.
- //
- // (deeply nested facets may contain more buckets then the max, but we won't *check* all of them)
- final int maxBucketsAllowed = atLeast(2000);
- final AtomicInteger maxBucketsToCheck = new AtomicInteger(maxBucketsAllowed);
-
- final int numIters = atLeast(20);
- for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
- assertFacetCountsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(), buildRandomQuery());
- }
- assertTrue("Didn't check a single bucket???", maxBucketsToCheck.get() < maxBucketsAllowed);
- }
-
- /**
- * Generates a random query string across the randomized fields/values in the index
- *
- * @see #randFieldValue
- * @see #field
- */
- private static String buildRandomQuery() {
- if (0 == TestUtil.nextInt(random(), 0,10)) {
- return "*:*";
- }
- final int numClauses = TestUtil.nextInt(random(), 3, 10);
- List<String> clauses = new ArrayList<String>(numClauses);
- for (int c = 0; c < numClauses; c++) {
- final int fieldNum = random().nextInt(MAX_FIELD_NUM);
- // keep queries simple, just use str fields - not point of test
- clauses.add(strfield(fieldNum) + ":" + randFieldValue(fieldNum));
- }
- return "(" + StringUtils.join(clauses, " OR ") + ")";
- }
-
- /**
- * Given a set of (potentially nested) term facets, and a base query string, asserts that
- * the actual counts returned when executing that query with those facets match the expected results
- * of filtering on the equivalent facet terms+domain
- */
- private void assertFacetCountsAreCorrect(final AtomicInteger maxBucketsToCheck,
- Map<String,TermFacet> expected,
- final String query) throws SolrServerException, IOException {
-
- final SolrParams baseParams = params("q", query, "rows","0");
- final SolrParams facetParams = params("json.facet", ""+TermFacet.toJSONFacetParamValue(expected));
- final SolrParams initParams = SolrParams.wrapAppended(facetParams, baseParams);
-
- log.info("Doing full run: {}", initParams);
-
- QueryResponse rsp = null;
- // JSON Facets not (currently) available from QueryResponse...
- NamedList topNamedList = null;
- try {
- rsp = (new QueryRequest(initParams)).process(getRandClient(random()));
- assertNotNull(initParams + " is null rsp?", rsp);
- topNamedList = rsp.getResponse();
- assertNotNull(initParams + " is null topNamedList?", topNamedList);
- } catch (Exception e) {
- throw new RuntimeException("init query failed: " + initParams + ": " +
- e.getMessage(), e);
- }
- try {
- final NamedList facetResponse = (NamedList) topNamedList.get("facets");
- assertNotNull("null facet results?", facetResponse);
- assertEquals("numFound mismatch with top count?",
- rsp.getResults().getNumFound(), ((Number)facetResponse.get("count")).longValue());
- if (0 == rsp.getResults().getNumFound()) {
- // when the query matches nothing, we should expect no top level facets
- expected = Collections.emptyMap();
- }
- assertFacetCountsAreCorrect(maxBucketsToCheck, expected, baseParams, facetResponse);
- } catch (AssertionError e) {
- throw new AssertionError(initParams + " ===> " + topNamedList + " --> " + e.getMessage(), e);
- } finally {
- log.info("Ending full run");
- }
- }
-
- /**
- * Recursive Helper method that walks the actual facet response, comparing the counts to the expected output
- * based on the equivalent filters generated from the original TermFacet.
- */
- private void assertFacetCountsAreCorrect(final AtomicInteger maxBucketsToCheck,
- final Map<String,TermFacet> expected,
- final SolrParams baseParams,
- final NamedList actualFacetResponse) throws SolrServerException, IOException {
-
- for (Map.Entry<String,TermFacet> entry : expected.entrySet()) {
- final String facetKey = entry.getKey();
- final TermFacet facet = entry.getValue();
- final NamedList results = (NamedList) actualFacetResponse.get(facetKey);
- assertNotNull(facetKey + " key missing from: " + actualFacetResponse, results);
- final List<NamedList> buckets = (List<NamedList>) results.get("buckets");
- assertNotNull(facetKey + " has null buckets: " + actualFacetResponse, buckets);
-
- if (buckets.isEmpty()) {
- // should only happen if the baseParams query does not match any docs with our field X
- final long docsWithField = getRandClient(random()).query
- (facet.applyValueConstraintAndDomain(baseParams, facetKey, "[* TO *]")).getResults().getNumFound();
- assertEquals(facetKey + " has no buckets, but docs in query exist with field: " + facet.field,
- 0, docsWithField);
- }
-
- for (NamedList bucket : buckets) {
- final long count = ((Number) bucket.get("count")).longValue();
- final String fieldVal = bucket.get("val").toString(); // int or stringified int
-
- // change our query to filter on the fieldVal, and wrap in the facet domain (if any)
- final SolrParams verifyParams = facet.applyValueConstraintAndDomain(baseParams, facetKey, fieldVal);
-
- // check the count for this bucket
- assertEquals(facetKey + ": " + verifyParams,
- count, getRandClient(random()).query(verifyParams).getResults().getNumFound());
-
- if (maxBucketsToCheck.decrementAndGet() <= 0) {
- return;
- }
-
- // recursively check subFacets
- if (! facet.subFacets.isEmpty()) {
- assertFacetCountsAreCorrect(maxBucketsToCheck, facet.subFacets, verifyParams, bucket);
- }
- }
- }
- assertTrue("facets have unexpected keys left over: " + actualFacetResponse,
- // should alwasy be a count, maybe a 'val' if we're a subfacet
- (actualFacetResponse.size() == expected.size() + 1) ||
- (actualFacetResponse.size() == expected.size() + 2));
- }
-
-
- /**
- * Trivial data structure for modeling a simple terms facet that can be written out as a json.facet param.
- *
- * Doesn't do any string escaping or quoting, so don't use whitespace or reserved json characters
- */
- private static final class TermFacet {
- public final String field;
- public final Map<String,TermFacet> subFacets = new LinkedHashMap<>();
- public final JoinDomain domain; // may be null
- public final Integer limit; // may be null
- public final Integer overrequest; // may be null
- public final Boolean refine; // may be null
-
- /** Simplified constructor asks for limit = # unique vals */
- public TermFacet(String field, JoinDomain domain) {
- this(field, domain, UNIQUE_FIELD_VALS, 0, false);
- }
- public TermFacet(String field, JoinDomain domain, Integer limit, Integer overrequest, Boolean refine) {
- assert null != field;
- this.field = field;
- this.domain = domain;
- this.limit = limit;
- this.overrequest = overrequest;
- this.refine = refine;
- if (isRefinementNeeded(limit, overrequest)) {
- assertEquals("Invalid refine param based on limit & overrequest: " + this.toString(),
- Boolean.TRUE, refine);
- }
- }
-
- /**
- * Returns new SolrParams that:
- * <ul>
- * <li>copy the original SolrParams</li>
- * <li>modify/wrap the original "q" param to capture the domain change for this facet (if any)</li>
- * <li>add a filter query against this field with the specified value</li>
- * </ul>
- *
- * @see JoinDomain#applyDomainToQuery
- */
- public SolrParams applyValueConstraintAndDomain(SolrParams orig, String facetKey, String facetVal) {
- // first wrap our original query in the domain if there is one...
- if (null != domain) {
- orig = domain.applyDomainToQuery(facetKey + "_q", orig);
- }
- // then filter by the facet value we need to test...
- final ModifiableSolrParams out = new ModifiableSolrParams(orig);
- out.set("q", field + ":" + facetVal + " AND " + orig.get("q"));
-
- return out;
- }
-
- /**
- * recursively generates the <code>json.facet</code> param value to use for testing this facet
- */
- private CharSequence toJSONFacetParamValue() {
- final String limitStr = (null == limit) ? "" : (", limit:" + limit);
- final String overrequestStr = (null == overrequest) ? "" : (", overrequest:" + overrequest);
- final String refineStr = (null == refine) ? "" : ", refine:" + refine;
- final StringBuilder sb = new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + refineStr);
- if (! subFacets.isEmpty()) {
- sb.append(", facet:");
- sb.append(toJSONFacetParamValue(subFacets));
- }
- if (null != domain) {
- CharSequence ds = domain.toJSONFacetParamValue();
- if (null != ds) {
- sb.append(", ").append(ds);
- }
- }
- sb.append("}");
- return sb;
- }
-
- /**
- * Given a set of (possibly nested) facets, generates a suitable <code>json.facet</code> param value to
- * use for testing them against in a solr request.
- */
- public static CharSequence toJSONFacetParamValue(Map<String,TermFacet> facets) {
- assert null != facets;
- assert 0 < facets.size();
- StringBuilder sb = new StringBuilder("{");
- for (String key : facets.keySet()) {
- sb.append(key).append(" : ").append(facets.get(key).toJSONFacetParamValue());
- sb.append(" ,");
- }
- sb.setLength(sb.length() - 1);
- sb.append("}");
- return sb;
- }
-
- /**
- * Factory method for generating some random (nested) facets.
- *
- * For simplicity, each facet will have a unique key name, regardless of it's depth under other facets
- *
- * @see JoinDomain
- */
- public static Map<String,TermFacet> buildRandomFacets() {
- // for simplicity, use a unique facet key regardless of depth - simplifies verification
- AtomicInteger keyCounter = new AtomicInteger(0);
- final int maxDepth = TestUtil.nextInt(random(), 0, (usually() ? 2 : 3));
- return buildRandomFacets(keyCounter, maxDepth);
- }
-
- /**
- * picks a random value for the "limit" param, biased in favor of interesting test cases
- *
- * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
- * @see #UNIQUE_FIELD_VALS
- */
- public static Integer randomLimitParam(Random r) {
- final int limit = 1 + r.nextInt(UNIQUE_FIELD_VALS * 2);
- if (limit >= UNIQUE_FIELD_VALS && r.nextBoolean()) {
- return -1; // unlimited
- } else if (limit == DEFAULT_LIMIT && r.nextBoolean()) {
- return null; // sometimes, don't specify limit if it's the default
- }
- return limit;
- }
-
- /**
- * picks a random value for the "overrequest" param, biased in favor of interesting test cases
- *
- * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
- * @see #UNIQUE_FIELD_VALS
- */
- public static Integer randomOverrequestParam(Random r) {
- switch(r.nextInt(10)) {
- case 0:
- case 1:
- case 2:
- case 3:
- return 0; // 40% of the time, no overrequest to better stress refinement
- case 4:
- case 5:
- return r.nextInt(UNIQUE_FIELD_VALS); // 20% ask for less them what's needed
- case 6:
- return r.nextInt(Integer.MAX_VALUE); // 10%: completley random value, statisticaly more then enough
- default: break;
- }
- // else.... either leave param unspecified (or redundently specify the -1 default)
- return r.nextBoolean() ? null : -1;
- }
-
- /**
- * picks a random value for the "refine" param, that is garunteed to be suitable for
- * the specified limit & overrequest params.
- *
- * @return a value to specify in the request, or null to specify nothing (trigger default behavior)
- * @see #randomLimitParam
- * @see #randomOverrequestParam
- * @see #UNIQUE_FIELD_VALS
- */
- public static Boolean randomRefineParam(Random r, Integer limitParam, Integer overrequestParam) {
- if (isRefinementNeeded(limitParam, overrequestParam)) {
- return true;
- }
-
- // refinement is not required
- if (0 == r.nextInt(10)) { // once in a while, turn on refinement even if it isn't needed.
- return true;
- }
- // explicitly or implicitly indicate refinement is not needed
- return r.nextBoolean() ? false : null;
- }
-
- /**
- * Deterministicly identifies if the specified limit & overrequest params <b>require</b>
- * a "refine:true" param be used in the the request, in order for the counts to be 100% accurate.
- *
- * @see #UNIQUE_FIELD_VALS
- */
- public static boolean isRefinementNeeded(Integer limitParam, Integer overrequestParam) {
-
- if (FORCE_DISABLE_REFINEMENT) {
- return false;
- }
-
- // use the "effective" values if the params are null
- final int limit = null == limitParam ? DEFAULT_LIMIT : limitParam;
- final int overrequest = null == overrequestParam ? 0 : overrequestParam;
-
- return
- // don't presume how much overrequest will be done by default, just check the limit
- (overrequest < 0 && limit < UNIQUE_FIELD_VALS)
- // if the user specified overrequest is not "enough" to get all unique values
- || (overrequest >= 0 && (long)limit + overrequest < UNIQUE_FIELD_VALS);
- }
-
- /**
- * recursive helper method for building random facets
- *
- * @param keyCounter used to ensure every generated facet has a unique key name
- * @param maxDepth max possible depth allowed for the recusion, a lower value may be used depending on how many facets are returned at the current level.
- */
- private static Map<String,TermFacet> buildRandomFacets(AtomicInteger keyCounter, int maxDepth) {
- final int numFacets = Math.max(1, TestUtil.nextInt(random(), -1, 3)); // 3/5th chance of being '1'
- Map<String,TermFacet> results = new LinkedHashMap<>();
- for (int i = 0; i < numFacets; i++) {
- final JoinDomain domain = JoinDomain.buildRandomDomain();
- assert null != domain;
- final Integer limit = randomLimitParam(random());
- final Integer overrequest = randomOverrequestParam(random());
- final TermFacet facet = new TermFacet(field(random().nextBoolean() ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES,
- random().nextInt(MAX_FIELD_NUM)),
- domain, limit, overrequest,
- randomRefineParam(random(), limit, overrequest));
- results.put("facet_" + keyCounter.incrementAndGet(), facet);
- if (0 < maxDepth) {
- // if we're going wide, don't go deep
- final int nextMaxDepth = Math.max(0, maxDepth - numFacets);
- facet.subFacets.putAll(buildRandomFacets(keyCounter, TestUtil.nextInt(random(), 0, nextMaxDepth)));
- }
- }
- return results;
- }
- }
-
-
- /**
- * Models a Domain Change which includes either a 'join' or a 'filter' or both
- */
- private static final class JoinDomain {
- public final String from;
- public final String to;
- public final String filter; // not bothering with more then 1 filter, not the point of the test
-
- /**
- * @param from left side of join field name, null if domain involves no joining
- * @param to right side of join field name, null if domain involves no joining
- * @param filter filter to apply to domain, null if domain involves no filtering
- */
- public JoinDomain(String from, String to, String filter) {
- assert ! ((null == from) ^ (null == to)) : "if from is null, to must be null";
- this.from = from;
- this.to = to;
- this.filter = filter;
- }
-
- /**
- * @return the JSON string representing this domain for use in a facet param, or null if no domain should be used
- * */
- public CharSequence toJSONFacetParamValue() {
- if (null == from && null == filter) {
- return null;
- }
- StringBuilder sb = new StringBuilder("domain:{");
- if (null != from) {
- assert null != to;
- sb. append("join:{from:").append(from).append(",to:").append(to).append("}");
- if (null != filter){
- sb.append(",");
- }
-
- }
- if (null != filter) {
- sb.append("filter:'").append(filter).append("'");
- }
- sb.append("}");
- return sb;
- }
-
- /**
- * Given some original SolrParams, returns new SolrParams where the original "q" param is wrapped
- * as needed to apply the equivalent transformation to a query as this domain would to a facet
- */
- public SolrParams applyDomainToQuery(String safeKey, SolrParams in) {
- assert null == in.get(safeKey); // shouldn't be possible if every facet uses a unique key string
-
- String q = in.get("q");
- final ModifiableSolrParams out = new ModifiableSolrParams(in);
- if (null != from) {
- out.set(safeKey, in.get("q"));
- q = "{!join from="+from+" to="+to+" v=$"+safeKey+"}";
- }
- if (null != filter) {
- q = filter + " AND " + q;
- }
- out.set("q", q);
- return out;
- }
-
- /**
- * Factory method for creating a random domain change to use with a facet - may return an 'noop' JoinDomain,
- * but will never return null.
- */
- public static JoinDomain buildRandomDomain() {
-
- // use consistent type on both sides of join
- final String[] suffixes = random().nextBoolean() ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES;
-
- final boolean noJoin = random().nextBoolean();
-
- String from = null;
- String to = null;
- for (;;) {
- if (noJoin) break;
- from = field(suffixes, random().nextInt(MAX_FIELD_NUM));
- to = field(suffixes, random().nextInt(MAX_FIELD_NUM));
- // HACK: joined numeric point fields need docValues.. for now just skip _is fields if we are dealing with points.
- if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP) && (from.endsWith("_is") || to.endsWith("_is")))
- {
- continue;
- }
- break;
- }
-
- // keep it simple, only filter on string fields - not point of test
- final String filterField = strfield(random().nextInt(MAX_FIELD_NUM));
-
- final String filter = random().nextBoolean() ? null : filterField+":[* TO *]";
- return new JoinDomain(from, to, filter);
- }
- }
-
- /**
- * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed
- * at a node in our cluster
- */
- public static SolrClient getRandClient(Random rand) {
- int numClients = CLIENTS.size();
- int idx = TestUtil.nextInt(rand, 0, numClients);
-
- return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx);
- }
-
- public static void waitForRecoveriesToFinish(CloudSolrClient client) throws Exception {
- assert null != client.getDefaultCollection();
- AbstractDistribZkTestBase.waitForRecoveriesToFinish(client.getDefaultCollection(),
- client.getZkStateReader(),
- true, true, 330);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetSKG.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetSKG.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetSKG.java
deleted file mode 100644
index 24384cc..0000000
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudJSONFacetSKG.java
+++ /dev/null
@@ -1,677 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.cloud;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.commons.lang.StringUtils;
-
-import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.lucene.util.TestUtil;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.embedded.JettySolrRunner;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.CollectionAdminRequest;
-import org.apache.solr.client.solrj.request.QueryRequest;
-import org.apache.solr.client.solrj.response.QueryResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.search.facet.FacetField;
-import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
-import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
-
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * <p>
- * A randomized test of nested facets using the <code>relatedness()</code> function, that asserts the
- * accuracy the results for all the buckets returned using verification queries of the (expected)
- * foreground & background queries based on the nested facet terms.
- * <p>
- * Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute
- * for refinement in order to ensure accurate "skg" computation across shards. For that reason, this
- * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a
- * <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee that all shards
- * participate in all facets, so that the popularity & relatedness values returned can be proven
- * with validation requests.
- * </p>
- * <p>
- * (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary to
- * prevent situations where a single shardX may return candidate bucket with no child-buckets due to
- * the normal facet intersections, but when refined on other shardY(s), can produce "high scoring"
- * SKG child-buckets, which would then be missing the foreground/background "size" contributions from
- * shardX.
- * </p>
- *
- * @see TestCloudJSONFacetJoinDomain
- */
-@Slow
-public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
-
- private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName();
- private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection";
-
- private static final int DEFAULT_LIMIT = FacetField.DEFAULT_FACET_LIMIT;
- private static final int MAX_FIELD_NUM = 15;
- private static final int UNIQUE_FIELD_VALS = 50;
-
- /** Multivalued string field suffixes that can be randomized for testing diff facet/join code paths */
- private static final String[] STR_FIELD_SUFFIXES = new String[] { "_ss", "_sds", "_sdsS" };
- /** Multivalued int field suffixes that can be randomized for testing diff facet/join code paths */
- private static final String[] INT_FIELD_SUFFIXES = new String[] { "_is", "_ids", "_idsS" };
-
- /** A basic client for operations at the cloud level, default collection will be set */
- private static CloudSolrClient CLOUD_CLIENT;
- /** One client per node */
- private static ArrayList<HttpSolrClient> CLIENTS = new ArrayList<>(5);
-
- @BeforeClass
- private static void createMiniSolrCloudCluster() throws Exception {
- // sanity check constants
- assertTrue("bad test constants: some suffixes will never be tested",
- (STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) && (INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM));
-
- // we need DVs on point fields to compute stats & facets
- if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
-
- // multi replicas should not matter...
- final int repFactor = usually() ? 1 : 2;
- // ... but we definitely want to test multiple shards
- final int numShards = TestUtil.nextInt(random(), 1, (usually() ? 2 :3));
- final int numNodes = (numShards * repFactor);
-
- final String configName = DEBUG_LABEL + "_config-set";
- final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
-
- configureCluster(numNodes).addConfig(configName, configDir).configure();
-
- Map<String, String> collectionProperties = new LinkedHashMap<>();
- collectionProperties.put("config", "solrconfig-tlog.xml");
- collectionProperties.put("schema", "schema_latest.xml");
- CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
- .setProperties(collectionProperties)
- .process(cluster.getSolrClient());
-
- CLOUD_CLIENT = cluster.getSolrClient();
- CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);
-
- waitForRecoveriesToFinish(CLOUD_CLIENT);
-
- for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
- CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));
- }
-
- final int numDocs = atLeast(100);
- for (int id = 0; id < numDocs; id++) {
- SolrInputDocument doc = sdoc("id", ""+id);
- for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
- // NOTE: we ensure every doc has at least one value in each field
- // that way, if a term is returned for a parent there there is garunteed to be at least one
- // one term in the child facet as well.
- //
- // otherwise, we'd face the risk of a single shardX returning parentTermX as a top term for
- // the parent facet, but having no child terms -- meanwhile on refinement another shardY that
- // did *not* returned parentTermX in phase#1, could return some *new* child terms under
- // parentTermX, but their stats would not include the bgCount from shardX.
- //
- // in normal operation, this is an edge case that isn't a big deal because the ratios &
- // relatedness scores are statistically approximate, but for the purpose of this test where
- // we verify correctness via exactness we need all shards to contribute to the SKG statistics
- final int numValsThisDoc = TestUtil.nextInt(random(), 1, (usually() ? 5 : 10));
- for (int v = 0; v < numValsThisDoc; v++) {
- final String fieldValue = randFieldValue(fieldNum);
-
- // for each fieldNum, there are actaully two fields: one string, and one integer
- doc.addField(field(STR_FIELD_SUFFIXES, fieldNum), fieldValue);
- doc.addField(field(INT_FIELD_SUFFIXES, fieldNum), fieldValue);
- }
- }
- CLOUD_CLIENT.add(doc);
- if (random().nextInt(100) < 1) {
- CLOUD_CLIENT.commit(); // commit 1% of the time to create new segments
- }
- if (random().nextInt(100) < 5) {
- CLOUD_CLIENT.add(doc); // duplicate the doc 5% of the time to create deleted docs
- }
- }
- CLOUD_CLIENT.commit();
- }
-
- /**
- * Given a (random) number, and a (static) array of possible suffixes returns a consistent field name that
- * uses that number and one of hte specified suffixes in it's name.
- *
- * @see #STR_FIELD_SUFFIXES
- * @see #INT_FIELD_SUFFIXES
- * @see #MAX_FIELD_NUM
- * @see #randFieldValue
- */
- private static String field(final String[] suffixes, final int fieldNum) {
- assert fieldNum < MAX_FIELD_NUM;
-
- final String suffix = suffixes[fieldNum % suffixes.length];
- return "field_" + fieldNum + suffix;
- }
- private static String strfield(final int fieldNum) {
- return field(STR_FIELD_SUFFIXES, fieldNum);
- }
- private static String intfield(final int fieldNum) {
- return field(INT_FIELD_SUFFIXES, fieldNum);
- }
-
- /**
- * Given a (random) field number, returns a random (integer based) value for that field.
- * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
- * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins
- * will match fewer documents based on how far apart the field numbers are.
- *
- * @see #UNIQUE_FIELD_VALS
- * @see #field
- */
- private static String randFieldValue(final int fieldNum) {
- return "" + (fieldNum + TestUtil.nextInt(random(), 1, UNIQUE_FIELD_VALS));
- }
-
-
- @AfterClass
- private static void afterClass() throws Exception {
- CLOUD_CLIENT.close(); CLOUD_CLIENT = null;
- for (HttpSolrClient client : CLIENTS) {
- client.close();
- }
- CLIENTS = null;
- }
-
- /**
- * Test some small, hand crafted, but non-trivial queries that are
- * easier to trace/debug then a pure random monstrosity.
- * (ie: if something obvious gets broken, this test may fail faster and in a more obvious way then testRandom)
- */
- public void testBespoke() throws Exception {
- { // trivial single level facet
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), UNIQUE_FIELD_VALS, 0, null);
- facets.put("top1", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
- }
-
- { // trivial single level facet w/sorting on skg
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- TermFacet top = new TermFacet(strfield(9), UNIQUE_FIELD_VALS, 0, "skg desc");
- facets.put("top2", top);
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
- }
-
- { // trivial single level facet w/ 2 diff ways to request "limit = (effectively) Infinite"
- // to sanity check refinement of buckets missing from other shard in both cases
-
- // NOTE that these two queries & facets *should* effectively identical given that the
- // very large limit value is big enough no shard will ever return that may terms,
- // but the "limit=-1" case it actaully triggers slightly different code paths
- // because it causes FacetField.returnsPartial() to be "true"
- for (int limit : new int[] { 999999999, -1 }) {
- Map<String,TermFacet> facets = new LinkedHashMap<>();
- facets.put("top_facet_limit__" + limit, new TermFacet(strfield(9), limit, 0, "skg desc"));
- final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
- assertFacetSKGsAreCorrect(maxBuckets, facets, strfield(7)+":11", strfield(5)+":9", "*:*");
- assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
- }
- }
- }
-
- public void testRandom() throws Exception {
-
- // since the "cost" of verifying the stats for each bucket is so high (see TODO in verifySKGResults())
- // we put a safety valve in place on the maximum number of buckets that we are willing to verify
- // across *all* the queries that we do.
- // that way if the randomized queries we build all have relatively small facets, so be it, but if
- // we get a really big one early on, we can test as much as possible, skip other iterations.
- //
- // (deeply nested facets may contain more buckets then the max, but we won't *check* all of them)
- final int maxBucketsAllowed = atLeast(2000);
- final AtomicInteger maxBucketsToCheck = new AtomicInteger(maxBucketsAllowed);
-
- final int numIters = atLeast(10);
- for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
- assertFacetSKGsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(),
- buildRandomQuery(), buildRandomQuery(), buildRandomQuery());
- }
- assertTrue("Didn't check a single bucket???", maxBucketsToCheck.get() < maxBucketsAllowed);
-
-
- }
-
- /**
- * Generates a random query string across the randomized fields/values in the index
- *
- * @see #randFieldValue
- * @see #field
- */
- private static String buildRandomQuery() {
- if (0 == TestUtil.nextInt(random(), 0,10)) {
- return "*:*";
- }
- final int numClauses = TestUtil.nextInt(random(), 3, 10);
- final String[] clauses = new String[numClauses];
- for (int c = 0; c < numClauses; c++) {
- final int fieldNum = random().nextInt(MAX_FIELD_NUM);
- // keep queries simple, just use str fields - not point of test
- clauses[c] = strfield(fieldNum) + ":" + randFieldValue(fieldNum);
- }
- return buildORQuery(clauses);
- }
-
- private static String buildORQuery(String... clauses) {
- assert 0 < clauses.length;
- return "(" + StringUtils.join(clauses, " OR ") + ")";
- }
-
- /**
- * Given a set of term facets, and top level query strings, asserts that
- * the SKG stats for each facet term returned when executing that query with those foreground/background
- * queries match the expected results of executing the equivalent queries in isolation.
- *
- * @see #verifySKGResults
- */
- private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
- Map<String,TermFacet> expected,
- final String query,
- final String foreQ,
- final String backQ) throws SolrServerException, IOException {
- final SolrParams baseParams = params("rows","0", "fore", foreQ, "back", backQ);
-
- final SolrParams facetParams = params("q", query,
- "json.facet", ""+TermFacet.toJSONFacetParamValue(expected,null));
- final SolrParams initParams = SolrParams.wrapAppended(facetParams, baseParams);
-
- log.info("Doing full run: {}", initParams);
-
- QueryResponse rsp = null;
- // JSON Facets not (currently) available from QueryResponse...
- NamedList topNamedList = null;
- try {
- rsp = (new QueryRequest(initParams)).process(getRandClient(random()));
- assertNotNull(initParams + " is null rsp?", rsp);
- topNamedList = rsp.getResponse();
- assertNotNull(initParams + " is null topNamedList?", topNamedList);
- } catch (Exception e) {
- throw new RuntimeException("init query failed: " + initParams + ": " +
- e.getMessage(), e);
- }
- try {
- final NamedList facetResponse = (NamedList) topNamedList.get("facets");
- assertNotNull("null facet results?", facetResponse);
- assertEquals("numFound mismatch with top count?",
- rsp.getResults().getNumFound(), ((Number)facetResponse.get("count")).longValue());
-
- // Note: even if the query has numFound=0, our explicit background query domain should
- // still force facet results
- // (even if the background query matches nothing, that just means there will be no
- // buckets in those facets)
- assertFacetSKGsAreCorrect(maxBucketsToCheck, expected, baseParams, facetResponse);
-
- } catch (AssertionError e) {
- throw new AssertionError(initParams + " ===> " + topNamedList + " --> " + e.getMessage(), e);
- } finally {
- log.info("Ending full run");
- }
- }
-
- /**
- * Recursive helper method that walks the actual facet response, comparing the SKG results to
- * the expected output based on the equivalent filters generated from the original TermFacet.
- */
- private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
- final Map<String,TermFacet> expected,
- final SolrParams baseParams,
- final NamedList actualFacetResponse) throws SolrServerException, IOException {
-
- for (Map.Entry<String,TermFacet> entry : expected.entrySet()) {
- final String facetKey = entry.getKey();
- final TermFacet facet = entry.getValue();
- final NamedList results = (NamedList) actualFacetResponse.get(facetKey);
- assertNotNull(facetKey + " key missing from: " + actualFacetResponse, results);
- final List<NamedList> buckets = (List<NamedList>) results.get("buckets");
- assertNotNull(facetKey + " has null buckets: " + actualFacetResponse, buckets);
-
- if (buckets.isEmpty()) {
- // should only happen if the background query does not match any docs with field X
- final long docsWithField = getNumFound(params("_trace", "noBuckets",
- "rows", "0",
- "q", facet.field+":[* TO *]",
- "fq", baseParams.get("back")));
-
- assertEquals(facetKey + " has no buckets, but docs in background exist with field: " + facet.field,
- 0, docsWithField);
- }
-
- // NOTE: it's important that we do this depth first -- not just because it's the easiest way to do it,
- // but because it means that our maxBucketsToCheck will ensure we do a lot of deep sub-bucket checking,
- // not just all the buckets of the top level(s) facet(s)
- for (NamedList bucket : buckets) {
- final String fieldVal = bucket.get("val").toString(); // int or stringified int
-
- verifySKGResults(facetKey, facet, baseParams, fieldVal, bucket);
- if (maxBucketsToCheck.decrementAndGet() <= 0) {
- return;
- }
-
- final SolrParams verifyParams = SolrParams.wrapAppended(baseParams,
- params("fq", facet.field + ":" + fieldVal));
-
- // recursively check subFacets
- if (! facet.subFacets.isEmpty()) {
- assertFacetSKGsAreCorrect(maxBucketsToCheck, facet.subFacets, verifyParams, bucket);
- }
- }
- }
-
- { // make sure we don't have any facet keys we don't expect
- // a little hackish because subfacets have extra keys...
- final LinkedHashSet expectedKeys = new LinkedHashSet(expected.keySet());
- expectedKeys.add("count");
- if (0 <= actualFacetResponse.indexOf("val",0)) {
- expectedKeys.add("val");
- expectedKeys.add("skg");
- }
- assertEquals("Unexpected keys in facet response",
- expectedKeys, actualFacetResponse.asShallowMap().keySet());
- }
- }
-
- /**
- * Verifies that the popularity & relatedness values containined in a single SKG bucket
- * match the expected values based on the facet field & bucket value, as well the existing
- * filterParams.
- *
- * @see #assertFacetSKGsAreCorrect
- */
- private void verifySKGResults(String facetKey, TermFacet facet, SolrParams filterParams,
- String fieldVal, NamedList<Object> bucket)
- throws SolrServerException, IOException {
-
- final String bucketQ = facet.field+":"+fieldVal;
- final NamedList<Object> skgBucket = (NamedList<Object>) bucket.get("skg");
- assertNotNull(facetKey + "/bucket:" + bucket.toString(), skgBucket);
-
- // TODO: make this more efficient?
- // ideally we'd do a single query w/4 facet.queries, one for each count
- // but formatting the queries is a pain, currently we leverage the accumulated fq's
- final long fgSize = getNumFound(SolrParams.wrapAppended(params("_trace", "fgSize",
- "rows","0",
- "q","{!query v=$fore}"),
- filterParams));
- final long bgSize = getNumFound(params("_trace", "bgSize",
- "rows","0",
- "q", filterParams.get("back")));
-
- final long fgCount = getNumFound(SolrParams.wrapAppended(params("_trace", "fgCount",
- "rows","0",
- "q","{!query v=$fore}",
- "fq", bucketQ),
- filterParams));
- final long bgCount = getNumFound(params("_trace", "bgCount",
- "rows","0",
- "q", bucketQ,
- "fq", filterParams.get("back")));
-
- assertEquals(facetKey + "/bucket:" + bucket + " => fgPop should be: " + fgCount + " / " + bgSize,
- roundTo5Digits((double) fgCount / bgSize),
- skgBucket.get("foreground_popularity"));
- assertEquals(facetKey + "/bucket:" + bucket + " => bgPop should be: " + bgCount + " / " + bgSize,
- roundTo5Digits((double) bgCount / bgSize),
- skgBucket.get("background_popularity"));
- assertEquals(facetKey + "/bucket:" + bucket + " => relatedness is wrong",
- roundTo5Digits(computeRelatedness(fgCount, fgSize, bgCount, bgSize)),
- skgBucket.get("relatedness"));
-
- }
-
-
- /**
- * Trivial data structure for modeling a simple terms facet that can be written out as a json.facet param.
- *
- * Doesn't do any string escaping or quoting, so don't use whitespace or reserved json characters
- */
- private static final class TermFacet {
- public final String field;
- public final Map<String,TermFacet> subFacets = new LinkedHashMap<>();
- public final Integer limit; // may be null
- public final Integer overrequest; // may be null
- public final String sort; // may be null
- /** Simplified constructor asks for limit = # unique vals */
- public TermFacet(String field) {
- this(field, UNIQUE_FIELD_VALS, 0, "skg desc");
-
- }
- public TermFacet(String field, Integer limit, Integer overrequest, String sort) {
- assert null != field;
- this.field = field;
- this.limit = limit;
- this.overrequest = overrequest;
- this.sort = sort;
- }
-
- /**
- * recursively generates the <code>json.facet</code> param value to use for testing this facet
- */
- private CharSequence toJSONFacetParamValue() {
- final String limitStr = (null == limit) ? "" : (", limit:" + limit);
- final String overrequestStr = (null == overrequest) ? "" : (", overrequest:" + overrequest);
- final String sortStr = (null == sort) ? "" : (", sort: '" + sort + "'");
- final StringBuilder sb
- = new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + sortStr);
-
- // see class javadocs for why we always use refine:true & the query:'*:*' domain for this test.
- sb.append(", refine: true, domain: { query: '*:*' }, facet:");
- sb.append(toJSONFacetParamValue(subFacets, "skg : 'relatedness($fore,$back)'"));
- sb.append("}");
- return sb;
- }
-
- /**
- * Given a set of (possibly nested) facets, generates a suitable <code>json.facet</code> param value to
- * use for testing them against in a solr request.
- */
- public static CharSequence toJSONFacetParamValue(final Map<String,TermFacet> facets,
- final String extraJson) {
- assert null != facets;
- if (0 == facets.size() && null == extraJson) {
- return "";
- }
-
- StringBuilder sb = new StringBuilder("{ processEmpty: true, ");
- for (String key : facets.keySet()) {
- sb.append(key).append(" : ").append(facets.get(key).toJSONFacetParamValue());
- sb.append(" ,");
- }
- if (null == extraJson) {
- sb.setLength(sb.length() - 1);
- } else {
- sb.append(extraJson);
- }
- sb.append("}");
- return sb;
- }
-
- /**
- * Factory method for generating some random facets.
- *
- * For simplicity, each facet will have a unique key name.
- */
- public static Map<String,TermFacet> buildRandomFacets() {
- // for simplicity, use a unique facet key regardless of depth - simplifies verification
- // and le's us enforce a hard limit on the total number of facets in a request
- AtomicInteger keyCounter = new AtomicInteger(0);
-
- final int maxDepth = TestUtil.nextInt(random(), 0, (usually() ? 2 : 3));
- return buildRandomFacets(keyCounter, maxDepth);
- }
-
- /**
- * picks a random value for the "sort" param, biased in favor of interesting test cases
- *
- * @return a sort string (w/direction), or null to specify nothing (trigger default behavior)
- * @see #randomLimitParam
- */
- public static String randomSortParam(Random r) {
-
- // IMPORTANT!!!
- // if this method is modified to produce new sorts, make sure to update
- // randomLimitParam to account for them if they are impacted by SOLR-12556
- final String dir = random().nextBoolean() ? "asc" : "desc";
- switch(r.nextInt(4)) {
- case 0: return null;
- case 1: return "count " + dir;
- case 2: return "skg " + dir;
- case 3: return "index " + dir;
- default: throw new RuntimeException("Broken case statement");
- }
- }
- /**
- * picks a random value for the "limit" param, biased in favor of interesting test cases
- *
- * <p>
- * <b>NOTE:</b> Due to SOLR-12556, we have to force an overrequest of "all" possible terms for
- * some sort values.
- * </p>
- *
- * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
- * @see #UNIQUE_FIELD_VALS
- * @see #randomSortParam
- */
- public static Integer randomLimitParam(Random r, final String sort) {
- if (null != sort) {
- if (sort.equals("count asc") || sort.startsWith("skg")) {
- // of the known types of sorts produced, these are at risk of SOLR-12556
- // so request (effectively) unlimited num buckets
- return r.nextBoolean() ? UNIQUE_FIELD_VALS : -1;
- }
- }
- final int limit = 1 + r.nextInt((int) (UNIQUE_FIELD_VALS * 1.5F));
- if (limit >= UNIQUE_FIELD_VALS && r.nextBoolean()) {
- return -1; // unlimited
- } else if (limit == DEFAULT_LIMIT && r.nextBoolean()) {
- return null; // sometimes, don't specify limit if it's the default
- }
- return limit;
- }
-
- /**
- * picks a random value for the "overrequest" param, biased in favor of interesting test cases.
- *
- * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
- * @see #UNIQUE_FIELD_VALS
- */
- public static Integer randomOverrequestParam(Random r) {
- switch(r.nextInt(10)) {
- case 0:
- case 1:
- case 2:
- case 3:
- return 0; // 40% of the time, disable overrequest to better stress refinement
- case 4:
- case 5:
- return r.nextInt(UNIQUE_FIELD_VALS); // 20% ask for less them what's needed
- case 6:
- return r.nextInt(Integer.MAX_VALUE); // 10%: completley random value, statisticaly more then enough
- default: break;
- }
- // else.... either leave param unspecified (or redundently specify the -1 default)
- return r.nextBoolean() ? null : -1;
- }
-
- /**
- * recursive helper method for building random facets
- *
- * @param keyCounter used to ensure every generated facet has a unique key name
- * @param maxDepth max possible depth allowed for the recusion, a lower value may be used depending on how many facets are returned at the current level.
- */
- private static Map<String,TermFacet> buildRandomFacets(AtomicInteger keyCounter, int maxDepth) {
- final int numFacets = Math.max(1, TestUtil.nextInt(random(), -1, 3)); // 3/5th chance of being '1'
- Map<String,TermFacet> results = new LinkedHashMap<>();
- for (int i = 0; i < numFacets; i++) {
- if (keyCounter.get() < 3) { // a hard limit on the total number of facets (regardless of depth) to reduce OOM risk
-
- final String sort = randomSortParam(random());
- final Integer limit = randomLimitParam(random(), sort);
- final Integer overrequest = randomOverrequestParam(random());
- final TermFacet facet = new TermFacet(field((random().nextBoolean()
- ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES),
- random().nextInt(MAX_FIELD_NUM)),
- limit, overrequest, sort);
- results.put("facet_" + keyCounter.incrementAndGet(), facet);
- if (0 < maxDepth) {
- // if we're going wide, don't go deep
- final int nextMaxDepth = Math.max(0, maxDepth - numFacets);
- facet.subFacets.putAll(buildRandomFacets(keyCounter, TestUtil.nextInt(random(), 0, nextMaxDepth)));
- }
- }
- }
- return results;
- }
- }
-
- /**
- * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed
- * at a node in our cluster
- */
- public static SolrClient getRandClient(Random rand) {
- int numClients = CLIENTS.size();
- int idx = TestUtil.nextInt(rand, 0, numClients);
-
- return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx);
- }
-
- /**
- * Uses a random SolrClient to execture a request and returns only the numFound
- * @see #getRandClient
- */
- public static long getNumFound(final SolrParams req) throws SolrServerException, IOException {
- return getRandClient(random()).query(req).getResults().getNumFound();
- }
-
- public static void waitForRecoveriesToFinish(CloudSolrClient client) throws Exception {
- assert null != client.getDefaultCollection();
- AbstractDistribZkTestBase.waitForRecoveriesToFinish(client.getDefaultCollection(),
- client.getZkStateReader(),
- true, true, 330);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71988c75/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
index 9de0d12..a661198 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
@@ -32,7 +32,7 @@ import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;
-public class DebugAgg extends AggValueSource {
+class DebugAgg extends AggValueSource {
public static AtomicLong parses = new AtomicLong(0);
public static class Parser extends ValueSourceParser {