You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mk...@apache.org on 2020/02/02 12:17:04 UTC

[lucene-solr] branch master updated: SOLR-12325: uniqueBlock(\{!v=foo:bar})

This is an automated email from the ASF dual-hosted git repository.

mkhl pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new d8bc9bc  SOLR-12325: uniqueBlock(\{!v=foo:bar})
d8bc9bc is described below

commit d8bc9bcfcf772dfbfb6aa39dc019b0787946707a
Author: Mikhail Khludnev <mk...@apache.org>
AuthorDate: Sun Feb 2 15:15:17 2020 +0300

    SOLR-12325: uniqueBlock(\{!v=foo:bar})
---
 solr/CHANGES.txt                                   |  2 +
 .../org/apache/solr/search/ValueSourceParser.java  |  8 ++-
 .../apache/solr/search/facet/UniqueBlockAgg.java   | 32 +++-------
 .../solr/search/facet/UniqueBlockFieldAgg.java     | 45 ++++++++++++++
 .../solr/search/facet/UniqueBlockQueryAgg.java     | 71 ++++++++++++++++++++++
 .../apache/solr/search/facet/TestJsonFacets.java   | 41 +++++++------
 .../facet/TestJsonFacetsWithNestedObjects.java     | 31 +++++++---
 solr/solr-ref-guide/src/json-facet-api.adoc        |  4 +-
 8 files changed, 182 insertions(+), 52 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 40bd76c..7fc4e1d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -168,6 +168,8 @@ New Features
 
  * SOLR-13892: New "top-level" docValues join implementation (Jason Gerlowski, Joel Bernstein)
 
+ * SOLR-12325: Introducing uniqueBlock({!v=type:parent}) aggregation (Anatolii Siuniaev via Mikhail Khludnev)
+
 Improvements
 ---------------------
 * SOLR-14120: Define JavaScript methods 'includes' and 'startsWith' to ensure AdminUI can be displayed when using
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 64cadb6..d054bc8 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -67,7 +67,8 @@ import org.apache.solr.search.facet.StddevAgg;
 import org.apache.solr.search.facet.SumAgg;
 import org.apache.solr.search.facet.SumsqAgg;
 import org.apache.solr.search.facet.UniqueAgg;
-import org.apache.solr.search.facet.UniqueBlockAgg;
+import org.apache.solr.search.facet.UniqueBlockFieldAgg;
+import org.apache.solr.search.facet.UniqueBlockQueryAgg;
 import org.apache.solr.search.facet.VarianceAgg;
 import org.apache.solr.search.function.CollapseScoreFunction;
 import org.apache.solr.search.function.ConcatStringFunction;
@@ -971,7 +972,10 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
     addParser("agg_uniqueBlock", new ValueSourceParser() {
       @Override
       public ValueSource parse(FunctionQParser fp) throws SyntaxError {
-        return new UniqueBlockAgg(fp.parseArg());
+        if (fp.sp.peek() == QueryParsing.LOCALPARAM_START.charAt(0) ) {
+          return new UniqueBlockQueryAgg(fp.parseNestedQuery());
+        }
+        return new UniqueBlockFieldAgg(fp.parseArg());
       }
     });
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
index 42ddbb5..77dfa64 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java
@@ -21,14 +21,14 @@ import java.util.Arrays;
 
 import org.apache.solr.schema.SchemaField;
 
-public class UniqueBlockAgg extends UniqueAgg {
+public abstract class UniqueBlockAgg extends UniqueAgg {
 
-  private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
-    
-    private int lastSeenValuesPerSlot[];
-    
-    private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
-        throws IOException { //  
+  protected static class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc {
+
+    protected int[] lastSeenValuesPerSlot;
+
+    protected UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots)
+        throws IOException { //
       super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null);
       counts = new int[numSlots];
       lastSeenValuesPerSlot = new int[numSlots];
@@ -70,25 +70,11 @@ public class UniqueBlockAgg extends UniqueAgg {
 
   public UniqueBlockAgg(String field) {
     super(field);
-    name= uniqueBlock;
+    name = uniqueBlock;
   }
 
   @Override
-  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
-    final String fieldName = getArg();
-    SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
-    if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
-      throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
-          ") doesn't allow multivalue fields, got " + sf);
-    } else {
-      if (sf.getType().getNumberType() != null) {
-        throw new IllegalArgumentException(uniqueBlock+"("+fieldName+
-            ") not yet support numbers " + sf);
-      } else {
-        return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
-      }
-    }
-  }
+  public abstract SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException ;
   
   @Override
   public FacetMerger createFacetMerger(Object prototype) {
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java
new file mode 100644
index 0000000..3f7f949
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+
+import org.apache.solr.schema.SchemaField;
+
+public class UniqueBlockFieldAgg extends UniqueBlockAgg {
+
+  public UniqueBlockFieldAgg(String field) {
+    super(field);
+  }
+
+  @Override
+  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
+    final String fieldName = getArg();
+    SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName);
+    if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+      throw new IllegalArgumentException(name+"("+fieldName+
+          ") doesn't allow multivalue fields, got " + sf);
+    } else {
+      if (sf.getType().getNumberType() != null) {
+        throw new IllegalArgumentException(name+"("+fieldName+
+            ") not yet support numbers " + sf);
+      } else {
+        return new UniqueBlockSlotAcc(fcontext, sf, numSlots);
+      }
+    }
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java
new file mode 100644
index 0000000..3cc46d8
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.function.IntFunction;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BitSet;
+
+import static org.apache.solr.search.join.BlockJoinParentQParser.getCachedFilter;
+
+public class UniqueBlockQueryAgg extends UniqueBlockAgg {
+
+  private static final class UniqueBlockQuerySlotAcc extends UniqueBlockSlotAcc {
+
+    private Query query;
+    private BitSet parentBitSet;
+
+    private UniqueBlockQuerySlotAcc(FacetContext fcontext, Query query, int numSlots)
+        throws IOException { //
+      super(fcontext, null, numSlots);
+      this.query = query;
+    }
+
+    @Override
+    public void setNextReader(LeafReaderContext readerContext) throws IOException {
+      this.parentBitSet = getCachedFilter(fcontext.req, query).getFilter().getBitSet(readerContext);
+    }
+
+    @Override
+    public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) {
+      if (parentBitSet != null) {
+        int ord = parentBitSet.nextSetBit(doc);
+        if (ord != DocIdSetIterator.NO_MORE_DOCS) {
+          collectOrdToSlot(slotNum, ord);
+        } 
+      }
+    }
+  }
+
+  final private Query query;
+
+  public UniqueBlockQueryAgg(Query query) {
+    super(null);
+    this.query = query;
+    arg = query.toString();
+  }
+
+  @Override
+  public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
+    return new UniqueBlockQuerySlotAcc(fcontext, query, numSlots);
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 3a6c694..ed566e3 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -3149,14 +3149,18 @@ public class TestJsonFacets extends SolrTestCaseHS {
 
     parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w");
     parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z")  );
-    parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y  ") );
-    parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t","  y z" )  );
+    parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","a", "v_t","x1   z")  );
+    parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","a", "v_t","x2   z")  );
+    parent.addChildDocument( sdoc("id","2.4", "type_s","page", "page_s","b", "v_t","x y  ") );
+    parent.addChildDocument( sdoc("id","2.5", "type_s","page", "page_s","c", "v_t","  y z" )  );
+    parent.addChildDocument( sdoc("id","2.6", "type_s","page", "page_s","c", "v_t","    z" )  );
     client.add(parent, null);
 
     parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e");
-    parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x    ")  );
-    parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t","  y  ")  );
-    parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t","    z")  );
+    parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","b", "v_t","x y  ") );
+    parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","d", "v_t","x    ")  );
+    parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","e", "v_t","  y  ")  );
+    parent.addChildDocument( sdoc("id","3.4", "type_s","page", "page_s","f", "v_t","    z")  );
     client.add(parent, null);
 
     parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e");
@@ -3171,35 +3175,38 @@ public class TestJsonFacets extends SolrTestCaseHS {
             "    field:type_s," +
             "    limit:-1," +
             "    facet: {" +
-            "           in_books: \"unique(_root_)\" }"+
+            "           in_books: \"unique(_root_)\"," +
+            "           via_field:\"uniqueBlock(_root_)\","+
+            "           via_query:\"uniqueBlock({!v=type_s:book})\" }"+
             "  }," +
             "  pages: {" +
             "    type:terms," +
             "    field:page_s," +
             "    limit:-1," +
             "    facet: {" +
-            "           in_books: \"uniqueBlock(_root_)\" }"+
+            "           in_books: \"unique(_root_)\"," +
+            "           via_field:\"uniqueBlock(_root_)\","+
+            "           via_query:\"uniqueBlock({!v=type_s:book})\" }"+
             "  }" +
             "}" )
 
-        , "response=={numFound:6,start:0,docs:[]}"
-        , "facets=={ count:6," +
+        , "response=={numFound:10,start:0,docs:[]}"
+        , "facets=={ count:10," +
             "types:{" +
-            "    buckets:[ {val:page, count:6, in_books:2} ]}" +
+            "    buckets:[ {val:page, count:10, in_books:2, via_field:2, via_query:2 } ]}" +
             "pages:{" +
             "    buckets:[ " +
-            "     {val:a, count:1, in_books:1}," +
-            "     {val:b, count:1, in_books:1}," +
-            "     {val:c, count:1, in_books:1}," +
-            "     {val:d, count:1, in_books:1}," +
-            "     {val:e, count:1, in_books:1}," +
-            "     {val:f, count:1, in_books:1}" +
+            "     {val:a, count:3, in_books:1, via_field:1, via_query:1}," +
+            "     {val:b, count:2, in_books:2, via_field:2, via_query:2}," +
+            "     {val:c, count:2, in_books:1, via_field:1, via_query:1}," +
+            "     {val:d, count:1, in_books:1, via_field:1, via_query:1}," +
+            "     {val:e, count:1, in_books:1, via_field:1, via_query:1}," +
+            "     {val:f, count:1, in_books:1, via_field:1, via_query:1}" +
             "    ]}" +
             "}"
     );
   }
 
-
   /**
    * Similar to {@link #testBlockJoin} but uses query time joining.
    * <p>
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
index cb8b71a..6e05491 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java
@@ -61,7 +61,9 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
             "author_s", "dan",
             "comment_t", "This book was too long."));
     client.add(book1, null);
-
+    if (rarely()) {
+      client.commit();
+    }
     SolrInputDocument book2 = sdoc(
         "id",         "book2",
         "type_s",     "book",
@@ -338,25 +340,36 @@ public class TestJsonFacetsWithNestedObjects extends SolrTestCaseHS{
   public void testUniqueBlock() throws Exception {
     final Client client = Client.localClient();
     ModifiableSolrParams p = params("rows","0");
+
+    // unique block using field and query logic
     client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}"
         , "childquery", "comment_t:*"
-        , "fl", "id", "fl" , "title_t" 
+        , "fl", "id", "fl" , "title_t"
+        , "root", "_root_"
+        , "parentQuery", "type_s:book"
         , "json.facet", "{" +
             "  types: {" +
-            "    domain: { blockChildren:\"type_s:book\"" +  
-             "            }," +
+            "    domain: { blockChildren:\"type_s:book\"" +
+            "            }," +
             "    type:terms," +
-            "    field:type_s,"
-            + "  limit:-1," +
+            "    field:type_s," +
+            "    limit:-1," +
             "    facet: {" +
-            "           in_books: \"uniqueBlock(_root_)\" }"+//}}," +
+            "           in_books1: \"uniqueBlock(_root_)\"," + // field logic
+            "           in_books2: \"uniqueBlock($root)\"," + // field reference logic
+            "           via_query1:\"uniqueBlock({!v=type_s:book})\", " + // query logic
+            "           via_query2:\"uniqueBlock({!v=$parentQuery})\" ," + // query reference logic
+            "           partial_query:\"uniqueBlock({!v=cat_s:fantasy})\" ," + // first doc hit only, never count afterwards
+            "           query_no_match:\"uniqueBlock({!v=cat_s:horor})\" }" +
             "  }" +
-        "}" )
+            "}" )
 
         , "response=={numFound:2,start:0,docs:[]}"
         , "facets=={ count:2," +
             "types:{" +
-            "    buckets:[ {val:review,    count:5, in_books:2} ]}" +
+            "    buckets:[ {val:review, count:5, in_books1:2, in_books2:2, "
+            + "                                  via_query1:2, via_query2:2, "
+            + "                                  partial_query:1, query_no_match:0} ]}" +
             "}"
     );
   }
diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc
index 9c944fd..1d2bc8a 100644
--- a/solr/solr-ref-guide/src/json-facet-api.adoc
+++ b/solr/solr-ref-guide/src/json-facet-api.adoc
@@ -572,7 +572,8 @@ Unlike all the facets discussed so far, Aggregation functions (also called *face
 |missing |`missing(author)` |number of documents which do not have value for given field or function
 |countvals |`countvals(author)` |number of values for a given field or function
 |unique |`unique(author)` |number of unique values of the given field. Beyond 100 values it yields not exact estimate
-|uniqueBlock |`uniqueBlock(\_root_)` |same as above with smaller footprint strictly for <<json-faceting-domain-changes.adoc#block-join-domain-changes,counting the number of Block Join blocks>>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended.
+|uniqueBlock |`uniqueBlock(\_root_)` or `uniqueBlock($fldref)` where `fldref=_root_` |same as above with smaller footprint strictly for <<json-faceting-domain-changes.adoc#block-join-domain-changes,counting the number of Block Join blocks>>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended.
+| |`uniqueBlock({!v=type:parent})` or `uniqueBlock({!v=$qryref})` where `qryref=type:parent` |same as above, but using bitset of the given query to aggregate hits.
 |hll |`hll(author)` |distributed cardinality estimate via hyper-log-log algorithm
 |percentile |`percentile(salary,50,75,99,99.9)` |Percentile estimates via t-digest algorithm. When sorting by this metric, the first percentile listed is used as the sort value.
 |sumsq |`sumsq(rent)` |sum of squares of field or function
@@ -875,6 +876,7 @@ color: {
   limit: -1,
   facet: {
     productsCount: "uniqueBlock(_root_)"
+      // or "uniqueBlock({!v=type:product})"
   }
 }
 ----