You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mk...@apache.org on 2018/05/01 17:18:34 UTC
lucene-solr:master: SOLR-8998: uniqueBlock() aggreagation for
singlevalue string fields in json.facet
Repository: lucene-solr
Updated Branches:
refs/heads/master d92b891f9 -> ee7b52f4c
SOLR-8998: uniqueBlock() aggreagation for singlevalue string fields in json.facet
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ee7b52f4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ee7b52f4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ee7b52f4
Branch: refs/heads/master
Commit: ee7b52f4c6fe55f0d07ce8228c246b61d1f2b5fb
Parents: d92b891
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Tue May 1 20:19:15 2018 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Tue May 1 20:19:15 2018 +0300
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../apache/solr/search/ValueSourceParser.java | 8 ++
.../solr/search/facet/UniqueBlockAgg.java | 91 ++++++++++++++++++++
.../search/facet/UniqueSinglevaluedSlotAcc.java | 16 ++--
.../apache/solr/search/QueryEqualityTest.java | 1 +
.../facet/TestJsonFacetsWithNestedObjects.java | 26 ++++++
.../search/join/BlockJoinFacetDistribTest.java | 77 +++++++++++++++--
7 files changed, 208 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ea166fb..f245002 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -105,6 +105,9 @@ New Features
* SOLR-11924: Added the ability to listen to changes in the set of active collections in a cloud
in the ZkStateReader, through the CloudCollectionsListener. (Houston Putman, Dennis Gove)
+* SOLR-8998: introducing uniqueBlock(_root_) aggregation as faster alternative to unique(_root_) for counting
+ child value facets in parents via json.facet on block index (Dr Oleg Savrasov, Mikhail Khludnev)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 0e26bf8..683cf4a 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -64,6 +64,7 @@ import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.UniqueAgg;
+import org.apache.solr.search.facet.UniqueBlockAgg;
import org.apache.solr.search.facet.VarianceAgg;
import org.apache.solr.search.function.CollapseScoreFunction;
import org.apache.solr.search.function.ConcatStringFunction;
@@ -964,6 +965,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
}
});
+ addParser("agg_uniqueBlock", new ValueSourceParser() {
+ @Override
+ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
+ return new UniqueBlockAgg(fp.parseArg());
+ }
+ });
+
addParser("agg_hll", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
new file mode 100644
index 0000000..c2bfec7
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.solr.schema.SchemaField;
+
+public class UniqueBlockAgg extends UniqueAgg {
+
+ private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
+
+ private int lastSeenValuesPerSlot[];
+
+ private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
+ throws IOException { //
+ super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
+ counts = new int[numSlots];
+ lastSeenValuesPerSlot = new int[numSlots];
+ Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
+ }
+
+ @Override
+ protected void collectOrdToSlot(int slotNum, int ord) {
+ if (lastSeenValuesPerSlot[slotNum]!=ord) {
+ counts[slotNum]+=1;
+ lastSeenValuesPerSlot[slotNum] = ord;
+ }
+ }
+
+ @Override
+ public void calcCounts() {
+ // noop already done
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(counts, 0);
+ Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
+ }
+
+ @Override
+ public Object getValue(int slot) throws IOException {
+ return counts[slot];
+ }
+ }
+
+ private final static String uniqueBlock = "uniqueBlock";
+
+ public UniqueBlockAgg(String field) {
+ super(field);
+ name= uniqueBlock;
+ }
+
+ @Override
+ public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
+ final String fieldName = getArg();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
+ ") doesn't allow multivalue fields, got " + sf);
+ } else {
+ if (sf.getType().getNumberType() != null) {
+ throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
+ ") not yet support numbers " + sf);
+ } else {
+ return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
+ }
+ }
+ }
+
+ @Override
+ public FacetMerger createFacetMerger(Object prototype) {
+ return new FacetLongMerger() ;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
index 9efa1e9..9a1b51e 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
@@ -81,12 +81,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
int segOrd = subDv.ordValue();
int ord = toGlobal==null ? segOrd : (int)toGlobal.get(segOrd);
- FixedBitSet bits = arr[slotNum];
- if (bits == null) {
- bits = new FixedBitSet(nTerms);
- arr[slotNum] = bits;
- }
- bits.set(ord);
+ collectOrdToSlot(slotNum, ord);
}
}
+
+ protected void collectOrdToSlot(int slotNum, int ord) {
+ FixedBitSet bits = arr[slotNum];
+ if (bits == null) {
+ bits = new FixedBitSet(nTerms);
+ arr[slotNum] = bits;
+ }
+ bits.set(ord);
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 6112303..d562076 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1162,6 +1162,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
assertFuncEquals("agg_sum(foo_i)", "agg_sum(foo_i)");
assertFuncEquals("agg_count()", "agg_count()");
assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)");
+ assertFuncEquals("agg_uniqueBlock(foo_i)", "agg_uniqueBlock(foo_i)");
assertFuncEquals("agg_hll(foo_i)", "agg_hll(foo_i)");
assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)");
assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
index 5a638ac..cb8b71a 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
@@ -334,4 +334,30 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
" ]}}"
);
}
+
+ public void testUniqueBlock() throws Exception {
+ final Client client = Client.localClient();
+ ModifiableSolrParams p = params("rows","0");
+ client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
+ , "childquery", "comment_t:*"
+ , "fl", "id", "fl" , "title_t"
+ , "json.facet", "{" +
+ " types: {" +
+ " domain: { blockChildren:\"type_s:book\"" +
+ " }," +
+ " type:terms," +
+ " field:type_s,"
+ + " limit:-1," +
+ " facet: {" +
+ " in_books: \"uniqueBlock(_root_)\" }"+//}}," +
+ " }" +
+ "}" )
+
+ , "response=={numFound:2,start:0,docs:[]}"
+ , "facets=={ count:2," +
+ "types:{" +
+ " buckets:[ {val:review, count:5, in_books:2} ]}" +
+ "}"
+ );
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
index 123ce97..c9d63c0 100644
--- a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
@@ -43,6 +43,7 @@ import org.junit.Test;
public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
+ private static final int defFacetLimit = 10;
private static final String collection = "facetcollection";
@BeforeClass
@@ -74,7 +75,8 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
"fuchsia", "light","dark","green","grey","don't","know","any","more" );
final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
- @Test
+ @SuppressWarnings("unchecked")
+ @Test
public void testBJQFacetComponent() throws Exception {
assert ! colors.removeAll(sizes): "there is no colors in sizes";
@@ -126,12 +128,20 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
if (!parentDocs.isEmpty()) {
indexDocs(parentDocs);
}
- cluster.getSolrClient().commit(collection);
-
+ if (random().nextBoolean()) {
+ cluster.getSolrClient().commit(collection);
+ } else {
+ cluster.getSolrClient().optimize(collection);
+ }
// to parent query
- final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
+ final String matchingColorsCommaSep = matchingColors.toString().replaceAll("[ \\[\\]]", "");
+ final String childQueryClause = "{!terms f=COLOR_s}" + matchingColorsCommaSep;
final boolean oldFacetsEnabled = random().nextBoolean();
- QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
+ final boolean limitJsonSizes = random().nextBoolean();
+ final boolean limitJsonColors = random().nextBoolean();
+
+ QueryResponse results = query("q", "{!parent which=\"type_s:parent\" v=$matchingColors}",//+childQueryClause,
+ "matchingColors", childQueryClause,
"facet", oldFacetsEnabled ? "true":"false", // try to enforce multiple phases
oldFacetsEnabled ? "facet.field" : "ignore" , "BRAND_s",
oldFacetsEnabled&&usually() ? "facet.limit" : "ignore" , "1",
@@ -141,7 +151,18 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
"child.facet.field", "COLOR_s",
"child.facet.field", "SIZE_s",
"distrib.singlePass", random().nextBoolean() ? "true":"false",
- "rows", random().nextBoolean() ? "0":"10"
+ "rows", random().nextBoolean() ? "0":"10",
+ "json.facet","{ "
+ + "children:{ type: query, query:\"*:*\", domain:{"
+ +"blockChildren:\"type_s:parent\", filter:{param:matchingColors}"
+ + "}, facet:{ colors:{ type:field, field:COLOR_s,"
+ + (limitJsonColors ? "":" limit:-1,")
+ + " facet:{ inprods:\"uniqueBlock(_root_)\"}}, "
+ + "sizes:{type:field, field:SIZE_s, "
+ + (limitJsonSizes ? "" : "limit:-1,")
+ + " facet:{inprods:\"uniqueBlock(_root_)\"}}"
+ + "}"
+ + "}}", "debugQuery","true"//, "shards", "shard1"
);
NamedList<Object> resultsResponse = results.getResponse();
assertNotNull(resultsResponse);
@@ -155,9 +176,49 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
}
}
-
+
assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
- //System.out.println(parentIdsByAttrValue);
+
+ final List<NamedList<Object>> jsonSizes = (List<NamedList<Object>>)
+ get(resultsResponse, "facets", "children", "sizes", "buckets");
+ final List<NamedList<Object>> jsonColors = (List<NamedList<Object>>)
+ get(resultsResponse, "facets", "children", "colors", "buckets");
+
+ if (limitJsonColors) {
+ assertTrue(""+jsonColors, jsonColors.size()<=defFacetLimit);
+ }
+
+ if (limitJsonSizes) {
+ assertTrue(""+jsonSizes, jsonSizes.size()<=defFacetLimit);
+ }
+
+ for (List<NamedList<Object>> vals : new List[] { jsonSizes,jsonColors}) {
+ int i=0;
+ for(NamedList<Object> tuples: vals) {
+ String val = (String) get(tuples,"val");
+ Number count = (Number) get(tuples,"inprods");
+ if (((vals==jsonSizes && limitJsonSizes) || // vals close to the limit are not exact
+ (vals==jsonColors && limitJsonColors)) && i>=defFacetLimit/2) {
+ assertTrue(i+ "th "+tuples+". "+vals,
+ parentIdsByAttrValue.get(val).size()>= count.intValue() &&
+ count.intValue()>0);
+ } else {
+ assertEquals(tuples+". "+vals,
+ parentIdsByAttrValue.get(val).size(),count.intValue());
+ }
+ i++;
+ }
+ }
+ if (!limitJsonColors && !limitJsonSizes) {
+ assertEquals(""+jsonSizes+jsonColors, parentIdsByAttrValue.size(),jsonSizes.size() + jsonColors.size());
+ }
+ }
+
+ private static Object get(Object nvList, String ... segments) {
+ for(String segment: segments) {
+ nvList = ((NamedList<Object>) nvList).get(segment);
+ }
+ return nvList;
}
private QueryResponse query(String ... arg) throws SolrServerException, IOException {