You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by "imply-cheddar (via GitHub)" <gi...@apache.org> on 2023/05/19 08:39:18 UTC

[GitHub] [druid] imply-cheddar commented on a diff in pull request #14312: Hll Sketch estimate can now be used as an expression

imply-cheddar commented on code in PR #14312:
URL: https://github.com/apache/druid/pull/14312#discussion_r1198678103


##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding

Review Comment:
   I think this comment was something you did for yourself?  Or is this intended for future readers of the code?



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchEstimateOperatorConversion.java:
##########
@@ -43,7 +43,7 @@
 
 public class HllSketchEstimateOperatorConversion implements SqlOperatorConversion
 {
-  private static final String FUNCTION_NAME = "HLL_SKETCH_ESTIMATE";
+  private static final String FUNCTION_NAME = "hll_sketch_estimate";

Review Comment:
   You made this lower case, but the ExprMacros still has it upper case, I'm actually not sure if the case matters, but why the inconsistency?  Did you explicitly do this because something was broken?



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+
+        public HllSketchEstimateExpr(List<Expr> args)
+        {
+          super(HLL_SKETCH_ESTIMATE, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          boolean round = false;
+          Expr arg = args.get(0);
+          ExprEval eval = arg.eval(bindings);
+          if (args.size() == 2) {
+            round = args.get(1).eval(bindings).asBoolean();
+          }
+
+          final Object valObj = eval.value();
+          if (valObj == null) {
+            return ExprEval.of(null);
+          }
+          if (eval.type().is(ExprType.COMPLEX) && valObj instanceof HllSketchHolder) {
+            HllSketchHolder h = HllSketchHolder.fromObj(valObj);

Review Comment:
   We already did an `instanceof` check for `HllSketchHolder` can't we just cast it?  Or, you could alternatively just pass in the `eval.value()` (assuming it's non-null) and just assume it's a type that this can deal with.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+
+        public HllSketchEstimateExpr(List<Expr> args)
+        {
+          super(HLL_SKETCH_ESTIMATE, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          boolean round = false;
+          Expr arg = args.get(0);
+          ExprEval eval = arg.eval(bindings);
+          if (args.size() == 2) {
+            round = args.get(1).eval(bindings).asBoolean();
+          }
+
+          final Object valObj = eval.value();
+          if (valObj == null) {
+            return ExprEval.of(null);
+          }
+          if (eval.type().is(ExprType.COMPLEX) && valObj instanceof HllSketchHolder) {
+            HllSketchHolder h = HllSketchHolder.fromObj(valObj);
+            double estimate = h.getEstimate();
+            return round ? ExprEval.of(Math.round(estimate)) : ExprEval.of(estimate);
+          } else {
+            throw HLLSketchEstimateExprMacro.this.validationFailed("requires a HllSketch as the argument");

Review Comment:
   `validationFailed` is not correct.  This is running in `eval()` which is actually operating on top of the data.  If you are trying to do a validation, that should in the `ExprMacro`'s `apply()` method.  Once we are here, we better be dealing with an object type that we can actually do something with.  I think the best implementation here is to just check if the `valObj` was null and if it is not, pass it into `HllSketchHolder.fromObj()` and just hope for the best.



##########
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java:
##########
@@ -888,4 +902,187 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches()
         ImmutableList.of(new Object[]{"a", 0L, "0"})
     );
   }
+
+  @Test
+  public void testHllEstimateAsVirtualColumn()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\")",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithRound()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1, TRUE)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\",1)",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnOnNonHllCol()
+  {
+    try {
+      testQuery(
+          "SELECT"
+          + " HLL_SKETCH_ESTIMATE(dim2)"
+          + " FROM druid.foo",
+          ImmutableList.of(
+              newScanQueryBuilder()
+                  .dataSource(CalciteTests.DATASOURCE1)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(new ExpressionVirtualColumn(
+                      "v0",
+                      "hll_sketch_estimate(\"dim2\")",
+                      ColumnType.DOUBLE,
+                      MACRO_TABLE
+                  ))
+                  .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .columns("v0")
+                  .context(QUERY_CONTEXT_DEFAULT)
+                  .build()
+          ),
+          ImmutableList.of()
+      );
+    }
+    catch (ExpressionValidationException e) {
+      Assert.assertTrue(
+          e.getMessage().contains("Function[HLL_SKETCH_ESTIMATE] requires a HllSketch as the argument")
+      );
+    }
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithGroupByOrderBy()
+  {
+    skipVectorize();
+    cannotVectorize();
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1), count(*)"
+        + " FROM druid.foo"
+        + " GROUP BY 1"
+        + " ORDER BY 2 DESC",
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setDataSource(CalciteTests.DATASOURCE1)
+                        .setGranularity(Granularities.ALL)
+                        .setVirtualColumns(new ExpressionVirtualColumn(
+                            "v0",
+                            "hll_sketch_estimate(\"hllsketch_dim1\")",
+                            ColumnType.DOUBLE,
+                            MACRO_TABLE
+                        ))
+                        .setDimensions(
+                            new DefaultDimensionSpec("v0", "d0", ColumnType.DOUBLE))
+                        .setAggregatorSpecs(
+                            aggregators(
+                                new CountAggregatorFactory("a0")
+                            )
+                        )
+                        .setLimitSpec(
+                            DefaultLimitSpec
+                                .builder()
+                                .orderBy(
+                                    new OrderByColumnSpec(
+                                        "a0",
+                                        OrderByColumnSpec.Direction.DESCENDING,
+                                        StringComparators.NUMERIC
+                                    )
+                                )
+                                .build()
+                        )
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{1.0D, 5L},
+            new Object[]{0.0D, 1L}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithTopN()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+        + " FROM druid.foo"
+        + " GROUP BY 1 ORDER BY 1"
+        + " LIMIT 2",

Review Comment:
   Maybe throw a `COUNT(*)` on this and order by that instead?  Just to make it seem more "topN" like.



##########
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java:
##########
@@ -888,4 +902,187 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches()
         ImmutableList.of(new Object[]{"a", 0L, "0"})
     );
   }
+
+  @Test
+  public void testHllEstimateAsVirtualColumn()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\")",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithRound()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1, TRUE)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\",1)",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnOnNonHllCol()
+  {
+    try {
+      testQuery(
+          "SELECT"
+          + " HLL_SKETCH_ESTIMATE(dim2)"
+          + " FROM druid.foo",
+          ImmutableList.of(
+              newScanQueryBuilder()
+                  .dataSource(CalciteTests.DATASOURCE1)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(new ExpressionVirtualColumn(
+                      "v0",
+                      "hll_sketch_estimate(\"dim2\")",
+                      ColumnType.DOUBLE,
+                      MACRO_TABLE
+                  ))
+                  .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .columns("v0")
+                  .context(QUERY_CONTEXT_DEFAULT)
+                  .build()
+          ),
+          ImmutableList.of()
+      );
+    }
+    catch (ExpressionValidationException e) {
+      Assert.assertTrue(
+          e.getMessage().contains("Function[HLL_SKETCH_ESTIMATE] requires a HllSketch as the argument")
+      );
+    }

Review Comment:
   Once you adjust the expression as requested, I expect this test to change some.  If you do the "pass to HllSketchHolder and hope" route, I expect that you will get an IllegalArgumentException saying that the values of dim2 are not legal.  But it might be something else too.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+
+        public HllSketchEstimateExpr(List<Expr> args)
+        {
+          super(HLL_SKETCH_ESTIMATE, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          boolean round = false;
+          Expr arg = args.get(0);
+          ExprEval eval = arg.eval(bindings);
+          if (args.size() == 2) {
+            round = args.get(1).eval(bindings).asBoolean();
+          }

Review Comment:
   Also, no reason to check this on every single call.  `args` is gonna be the same for all of them.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+
+        public HllSketchEstimateExpr(List<Expr> args)
+        {
+          super(HLL_SKETCH_ESTIMATE, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          boolean round = false;
+          Expr arg = args.get(0);
+          ExprEval eval = arg.eval(bindings);
+          if (args.size() == 2) {
+            round = args.get(1).eval(bindings).asBoolean();
+          }
+
+          final Object valObj = eval.value();
+          if (valObj == null) {
+            return ExprEval.of(null);
+          }
+          if (eval.type().is(ExprType.COMPLEX) && valObj instanceof HllSketchHolder) {

Review Comment:
   Do you have a test that shows that `eval.type().is()` is necessary?  As long as it got an `HllSketchHolder`, I'm nto sure the type of teh `eval` really matters.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+
+        public HllSketchEstimateExpr(List<Expr> args)
+        {
+          super(HLL_SKETCH_ESTIMATE, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          boolean round = false;
+          Expr arg = args.get(0);

Review Comment:
   No need to `.get(0)` on every single call here.  Do it once and reuse the reference.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllPostAggExprMacros.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.hll.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.hll.HllSketchHolder;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HllPostAggExprMacros
+{
+  // make same name as SQL binding
+  public static final String HLL_SKETCH_ESTIMATE = "HLL_SKETCH_ESTIMATE";
+
+  public static class HLLSketchEstimateExprMacro implements ExprMacroTable.ExprMacro
+  {
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      class HllSketchEstimateExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr

Review Comment:
   What is the importance of doing this as an inner class like this?  I don't know that it adds anything and I think it just makes things more confusing.  I realize you were perhaps just copying a pattern that was already in place.  I looked and it looks like I copied there pattern here too 
   
   https://github.com/apache/druid/pull/14195/files#diff-5c5c183e6c105fdbb9abfb3e63cb0e75e19b932c1767d1d4c8b7f6131aa3f7d5R52-R57
   
   But, like, the lines highlighted there really show why this pattern doesn't make sense.  I'm calling `args.get(0)` even though I've already gotten the arg out and am additionally passing it to the constructor.  So there's just a ton of referencing the same thing through all sorts of different means, which is confusing.  Let's not copy this pattern around anymore.  Either make this a static class and have it store the arg that it needs.  Or just return it as an anonymous class.



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaPostAggMacros.java:
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.aggregation.datasketches.theta.sql;
+
+import org.apache.druid.math.expr.Expr;
+import org.apache.druid.math.expr.ExprEval;
+import org.apache.druid.math.expr.ExprMacroTable;
+import org.apache.druid.math.expr.ExprType;
+import org.apache.druid.query.aggregation.datasketches.theta.SketchHolder;
+
+import java.util.List;
+
+public class ThetaPostAggMacros

Review Comment:
   I had various commentary in the HLL version.  Please make sure to apply it here as well.



##########
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java:
##########
@@ -888,4 +902,187 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches()
         ImmutableList.of(new Object[]{"a", 0L, "0"})
     );
   }
+
+  @Test
+  public void testHllEstimateAsVirtualColumn()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\")",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithRound()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1, TRUE)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\",1)",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}

Review Comment:
   I don't believe that any of the results actually changed once you turned on rounding...  It would be great to have this test run against some data set that actually makes a difference rounding and not rounding.
   
   For that matter, once you do that, you can use the same expression twice in the same query, once that does the default, once that rounds and once that doesn't round and then validate the results (and show that they are different)



##########
extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchEstimateOperatorConversion.java:
##########
@@ -40,7 +40,7 @@
 
 public class ThetaSketchEstimateOperatorConversion implements SqlOperatorConversion
 {
-  private static final String FUNCTION_NAME = "THETA_SKETCH_ESTIMATE";
+  private static final String FUNCTION_NAME = "theta_sketch_estimate";

Review Comment:
   once again we have this casing change...  Is that important?



##########
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java:
##########
@@ -1018,8 +1034,160 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches()
   @Test
   public void testThetaSketchIntersectOnScalarExpression()
   {
-    assertQueryIsUnplannable("SELECT THETA_SKETCH_INTERSECT(NULL, NULL) FROM foo",
+    assertQueryIsUnplannable(
+        "SELECT THETA_SKETCH_INTERSECT(NULL, NULL) FROM foo",
         "Possible error: THETA_SKETCH_INTERSECT can only be used on aggregates. " +
-            "It cannot be used directly on a column or on a scalar expression.");
+        "It cannot be used directly on a column or on a scalar expression."
+    );
+  }
+
+  @Test
+  public void testThetaSketchEstimateAsVirtualColumn()
+  {
+    testQuery(
+        "SELECT"
+        + " THETA_SKETCH_ESTIMATE(thetasketch_dim1)"
+        + " FROM foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "theta_sketch_estimate(\"thetasketch_dim1\")",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            NullHandling.replaceWithDefault() ? new Object[]{null} : new Object[]{0.0D},

Review Comment:
   What causes the difference here?  It would appear that this and HLL are not consistent with each other and I'm curious why.



##########
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java:
##########
@@ -888,4 +902,187 @@ public void testGroupByAggregatorDefaultValuesFinalizeOuterSketches()
         ImmutableList.of(new Object[]{"a", 0L, "0"})
     );
   }
+
+  @Test
+  public void testHllEstimateAsVirtualColumn()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\")",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithRound()
+  {
+    testQuery(
+        "SELECT"
+        + " HLL_SKETCH_ESTIMATE(hllsketch_dim1, TRUE)"
+        + " FROM druid.foo",
+        ImmutableList.of(
+            newScanQueryBuilder()
+                .dataSource(CalciteTests.DATASOURCE1)
+                .intervals(querySegmentSpec(Filtration.eternity()))
+                .virtualColumns(new ExpressionVirtualColumn(
+                    "v0",
+                    "hll_sketch_estimate(\"hllsketch_dim1\",1)",
+                    ColumnType.DOUBLE,
+                    MACRO_TABLE
+                ))
+                .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                .columns("v0")
+                .context(QUERY_CONTEXT_DEFAULT)
+                .build()
+        ),
+        ImmutableList.of(
+            new Object[]{0.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D},
+            new Object[]{1.0D}
+        )
+    );
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnOnNonHllCol()
+  {
+    try {
+      testQuery(
+          "SELECT"
+          + " HLL_SKETCH_ESTIMATE(dim2)"
+          + " FROM druid.foo",
+          ImmutableList.of(
+              newScanQueryBuilder()
+                  .dataSource(CalciteTests.DATASOURCE1)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(new ExpressionVirtualColumn(
+                      "v0",
+                      "hll_sketch_estimate(\"dim2\")",
+                      ColumnType.DOUBLE,
+                      MACRO_TABLE
+                  ))
+                  .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .columns("v0")
+                  .context(QUERY_CONTEXT_DEFAULT)
+                  .build()
+          ),
+          ImmutableList.of()
+      );
+    }
+    catch (ExpressionValidationException e) {
+      Assert.assertTrue(
+          e.getMessage().contains("Function[HLL_SKETCH_ESTIMATE] requires a HllSketch as the argument")
+      );
+    }
+  }
+
+  @Test
+  public void testHllEstimateAsVirtualColumnWithGroupByOrderBy()
+  {
+    skipVectorize();
+    cannotVectorize();

Review Comment:
   Do you know what makes this not vectorizable?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org