You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/05/08 21:15:02 UTC

[16/50] [abbrv] lucene-solr:jira/solr-11779: SOLR-8998: uniqueBlock() aggreagation for singlevalue string fields in json.facet

SOLR-8998: uniqueBlock() aggreagation for singlevalue string fields in json.facet


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ee7b52f4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ee7b52f4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ee7b52f4

Branch: refs/heads/jira/solr-11779
Commit: ee7b52f4c6fe55f0d07ce8228c246b61d1f2b5fb
Parents: d92b891
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Tue May 1 20:19:15 2018 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Tue May 1 20:19:15 2018 +0300

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +
 .../apache/solr/search/ValueSourceParser.java   |  8 ++
 .../solr/search/facet/UniqueBlockAgg.java       | 91 ++++++++++++++++++++
 .../search/facet/UniqueSinglevaluedSlotAcc.java | 16 ++--
 .../apache/solr/search/QueryEqualityTest.java   |  1 +
 .../facet/TestJsonFacetsWithNestedObjects.java  | 26 ++++++
 .../search/join/BlockJoinFacetDistribTest.java  | 77 +++++++++++++++--
 7 files changed, 208 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ea166fb..f245002 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -105,6 +105,9 @@ New Features
 * SOLR-11924: Added the ability to listen to changes in the set of active collections in a cloud
   in the ZkStateReader, through the CloudCollectionsListener. (Houston Putman, Dennis Gove)
 
+* SOLR-8998: introducing uniqueBlock(_root_) aggregation as faster alternative to unique(_root_) for counting
+  child value facets in parents via json.facet on block index (Dr Oleg Savrasov, Mikhail Khludnev)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 0e26bf8..683cf4a 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -64,6 +64,7 @@ import org.apache.solr.search.facet.StddevAgg;
 import org.apache.solr.search.facet.SumAgg;
 import org.apache.solr.search.facet.SumsqAgg;
 import org.apache.solr.search.facet.UniqueAgg;
+import org.apache.solr.search.facet.UniqueBlockAgg;
 import org.apache.solr.search.facet.VarianceAgg;
 import org.apache.solr.search.function.CollapseScoreFunction;
 import org.apache.solr.search.function.ConcatStringFunction;
@@ -964,6 +965,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
       }
     });
 
+    addParser("agg_uniqueBlock", new ValueSourceParser() {
+      @Override
+      public ValueSource parse(FunctionQParser fp) throws SyntaxError {
+        return new UniqueBlockAgg(fp.parseArg());
+      }
+    });
+
     addParser("agg_hll", new ValueSourceParser() {
       @Override
       public ValueSource parse(FunctionQParser fp) throws SyntaxError {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
new file mode 100644
index 0000000..c2bfec7
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.solr.schema.SchemaField;
+
+public class UniqueBlockAgg extends UniqueAgg {
+
+  private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
+    
+    private int lastSeenValuesPerSlot[];
+    
+    private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
+        throws IOException { //  
+      super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
+      counts = new int[numSlots];
+      lastSeenValuesPerSlot = new int[numSlots];
+      Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
+    }
+    
+    @Override
+    protected void collectOrdToSlot(int slotNum, int ord) {
+      if (lastSeenValuesPerSlot[slotNum]!=ord) {
+        counts[slotNum]+=1;
+        lastSeenValuesPerSlot[slotNum] = ord;
+      }
+    }
+    
+    @Override
+    public void calcCounts() {
+      // noop already done
+    }
+    
+    @Override
+    public void reset() throws IOException {
+      Arrays.fill(counts, 0);
+      Arrays.fill(lastSeenValuesPerSlot, Integer.MIN_VALUE);
+    }
+    
+    @Override
+    public Object getValue(int slot) throws IOException {
+      return counts[slot];
+    }
+  }
+
+  private final static String uniqueBlock = "uniqueBlock";
+
+  public UniqueBlockAgg(String field) {
+    super(field);
+    name= uniqueBlock;
+  }
+
+  @Override
+  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
+    final String fieldName = getArg();
+    SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
+    if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+      throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
+          ") doesn't allow multivalue fields, got " + sf);
+    } else {
+      if (sf.getType().getNumberType() != null) {
+        throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
+            ") not yet support numbers " + sf);
+      } else {
+        return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
+      }
+    }
+  }
+  
+  @Override
+  public FacetMerger createFacetMerger(Object prototype) {
+    return new FacetLongMerger() ;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
index 9efa1e9..9a1b51e 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
@@ -81,12 +81,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
       int segOrd = subDv.ordValue();
       int ord = toGlobal==null ? segOrd : (int)toGlobal.get(segOrd);
 
-      FixedBitSet bits = arr[slotNum];
-      if (bits == null) {
-        bits = new FixedBitSet(nTerms);
-        arr[slotNum] = bits;
-      }
-      bits.set(ord);
+      collectOrdToSlot(slotNum, ord);
     }
   }
+
+  protected void collectOrdToSlot(int slotNum, int ord) {
+    FixedBitSet bits = arr[slotNum];
+    if (bits == null) {
+      bits = new FixedBitSet(nTerms);
+      arr[slotNum] = bits;
+    }
+    bits.set(ord);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 6112303..d562076 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1162,6 +1162,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
     assertFuncEquals("agg_sum(foo_i)", "agg_sum(foo_i)");
     assertFuncEquals("agg_count()", "agg_count()");
     assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)");
+    assertFuncEquals("agg_uniqueBlock(foo_i)", "agg_uniqueBlock(foo_i)");
     assertFuncEquals("agg_hll(foo_i)", "agg_hll(foo_i)");
     assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)");
     assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
index 5a638ac..cb8b71a 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
@@ -334,4 +334,30 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
             "               ]}}"
     );
   }
+
+  public void testUniqueBlock() throws Exception {
+    final Client client = Client.localClient();
+    ModifiableSolrParams p = params("rows","0");
+    client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
+        , "childquery", "comment_t:*"
+        , "fl", "id", "fl" , "title_t" 
+        , "json.facet", "{" +
+            "  types: {" +
+            "    domain: { blockChildren:\"type_s:book\"" +  
+             "            }," +
+            "    type:terms," +
+            "    field:type_s,"
+            + "  limit:-1," +
+            "    facet: {" +
+            "           in_books: \"uniqueBlock(_root_)\" }"+//}}," +
+            "  }" +
+        "}" )
+
+        , "response=={numFound:2,start:0,docs:[]}"
+        , "facets=={ count:2," +
+            "types:{" +
+            "    buckets:[ {val:review,    count:5, in_books:2} ]}" +
+            "}"
+    );
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ee7b52f4/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
index 123ce97..c9d63c0 100644
--- a/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/BlockJoinFacetDistribTest.java
@@ -43,6 +43,7 @@ import org.junit.Test;
 
 public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
 
+  private static final int defFacetLimit = 10;
   private static final String collection = "facetcollection";
 
   @BeforeClass
@@ -74,7 +75,8 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
       "fuchsia", "light","dark","green","grey","don't","know","any","more" );
   final static List<String> sizes = Arrays.asList("s","m","l","xl","xxl","xml","xxxl","3","4","5","6","petite","maxi");
   
-  @Test
+  @SuppressWarnings("unchecked")
+  @Test 
   public void testBJQFacetComponent() throws Exception {
     
     assert ! colors.removeAll(sizes): "there is no colors in sizes";
@@ -126,12 +128,20 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
     if (!parentDocs.isEmpty()) {
       indexDocs(parentDocs);
     }
-    cluster.getSolrClient().commit(collection);
-
+    if (random().nextBoolean()) {
+      cluster.getSolrClient().commit(collection);
+    } else {
+      cluster.getSolrClient().optimize(collection);
+    }
     // to parent query
-    final String childQueryClause = "COLOR_s:("+(matchingColors.toString().replaceAll("[,\\[\\]]", " "))+")";
+    final String matchingColorsCommaSep = matchingColors.toString().replaceAll("[ \\[\\]]", "");
+    final String childQueryClause = "{!terms f=COLOR_s}" + matchingColorsCommaSep;
       final boolean oldFacetsEnabled = random().nextBoolean();
-      QueryResponse results = query("q", "{!parent which=\"type_s:parent\"}"+childQueryClause,
+      final boolean limitJsonSizes = random().nextBoolean();
+      final boolean limitJsonColors = random().nextBoolean();
+      
+      QueryResponse results = query("q", "{!parent which=\"type_s:parent\" v=$matchingColors}",//+childQueryClause,
+          "matchingColors", childQueryClause,
           "facet", oldFacetsEnabled ? "true":"false", // try to enforce multiple phases
               oldFacetsEnabled ? "facet.field" : "ignore" , "BRAND_s",
               oldFacetsEnabled&&usually() ? "facet.limit" : "ignore" , "1",
@@ -141,7 +151,18 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
           "child.facet.field", "COLOR_s",
           "child.facet.field", "SIZE_s",
           "distrib.singlePass", random().nextBoolean() ? "true":"false",
-          "rows", random().nextBoolean() ? "0":"10"
+          "rows", random().nextBoolean() ? "0":"10",
+          "json.facet","{ "
+              + "children:{ type: query, query:\"*:*\", domain:{"
+                    +"blockChildren:\"type_s:parent\", filter:{param:matchingColors}"
+                    + "}, facet:{ colors:{ type:field, field:COLOR_s,"
+                    +              (limitJsonColors ? "":" limit:-1,")
+                    +              " facet:{ inprods:\"uniqueBlock(_root_)\"}}, "
+                    +         "sizes:{type:field, field:SIZE_s, "
+                    +              (limitJsonSizes ? "" : "limit:-1,")
+                    +              " facet:{inprods:\"uniqueBlock(_root_)\"}}"
+                    + "}"
+              + "}}", "debugQuery","true"//, "shards", "shard1"
           );
       NamedList<Object> resultsResponse = results.getResponse();
       assertNotNull(resultsResponse);
@@ -155,9 +176,49 @@ public class BlockJoinFacetDistribTest extends SolrCloudTestCase{
               parentIdsByAttrValue.get(c.getName()).size(), c.getCount());
         }
       }
-      
+
       assertEquals(msg , parentIdsByAttrValue.size(),color_s.getValueCount() + size_s.getValueCount());
-      //System.out.println(parentIdsByAttrValue);
+
+      final List<NamedList<Object>> jsonSizes = (List<NamedList<Object>>)
+                              get(resultsResponse, "facets", "children", "sizes", "buckets");
+      final List<NamedList<Object>> jsonColors = (List<NamedList<Object>>)
+                                get(resultsResponse, "facets", "children", "colors", "buckets");
+
+      if (limitJsonColors) {
+        assertTrue(""+jsonColors, jsonColors.size()<=defFacetLimit);
+      }
+
+      if (limitJsonSizes) {
+        assertTrue(""+jsonSizes, jsonSizes.size()<=defFacetLimit);
+      }
+
+      for (List<NamedList<Object>> vals : new List[] { jsonSizes,jsonColors}) {
+        int i=0;
+        for(NamedList<Object> tuples: vals) {
+          String  val = (String) get(tuples,"val");
+          Number  count = (Number) get(tuples,"inprods");
+          if (((vals==jsonSizes && limitJsonSizes) || // vals close to the limit are not exact 
+              (vals==jsonColors && limitJsonColors)) && i>=defFacetLimit/2) {
+            assertTrue(i+ "th "+tuples+". "+vals, 
+                parentIdsByAttrValue.get(val).size()>= count.intValue() &&
+                count.intValue()>0);
+          } else {
+            assertEquals(tuples+". "+vals, 
+                parentIdsByAttrValue.get(val).size(),count.intValue());
+          }
+          i++;
+        }
+      }
+      if (!limitJsonColors && !limitJsonSizes) {
+        assertEquals(""+jsonSizes+jsonColors, parentIdsByAttrValue.size(),jsonSizes.size() + jsonColors.size());
+      }
+  }
+
+  private static Object get(Object nvList, String ... segments) {
+    for(String segment: segments) {
+      nvList = ((NamedList<Object>) nvList).get(segment);
+    }
+    return nvList;
   }
 
   private QueryResponse query(String ... arg) throws SolrServerException, IOException {