You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/04/04 19:05:29 UTC
[hive] 01/02: HIVE-23030: Enable sketch union-s to be rolled up
(Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 216e73a7ddd58974e6b1151d1b8d0e26f5f69239
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Sat Apr 4 18:51:39 2020 +0000
HIVE-23030: Enable sketch union-s to be rolled up (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
Signed-off-by: Zoltan Haindrich <zh...@cloudera.com>
---
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/exec/DataSketchesFunctions.java | 397 ++++++++++++++-------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +-
.../org/apache/hadoop/hive/ql/exec/Registry.java | 26 ++
.../hive/ql/optimizer/calcite/HiveRelBuilder.java | 5 +
.../calcite/functions/HiveMergeableAggregate.java | 66 ++++
.../calcite/functions/HiveSqlSumAggFunction.java | 2 -
.../calcite/translator/SqlFunctionConverter.java | 21 +-
.../org/apache/hive/plugin/api/HiveUDFPlugin.java | 35 ++
.../sketches_materialized_view_rollup.q | 32 ++
.../llap/sketches_materialized_view_rollup.q.out | 187 ++++++++++
11 files changed, 634 insertions(+), 140 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f54c96e..d2c9127 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -824,6 +824,7 @@ minillaplocal.query.files=\
schq_ingest.q,\
sketches_hll.q,\
sketches_theta.q,\
+ sketches_materialized_view_rollup.q,\
table_access_keys_stats.q,\
temp_table_llap_partitioned.q,\
tez_bmj_schema_evolution.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
index b9d265f..eec90c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
@@ -18,15 +18,35 @@
package org.apache.hadoop.hive.ql.exec;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.calcite.rel.type.RelDataTypeImpl;
+import org.apache.calcite.rel.type.RelProtoDataType;
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
/**
* Registers functions from the DataSketches library as builtin functions.
*
* In an effort to show a more consistent
*/
-public class DataSketchesFunctions {
+public final class DataSketchesFunctions implements HiveUDFPlugin {
+
+ public static final DataSketchesFunctions INSTANCE = new DataSketchesFunctions();
+
+ private static final String DATASKETCHES_PREFIX = "ds";
private static final String DATA_TO_SKETCH = "sketch";
private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds";
@@ -53,169 +73,276 @@ public class DataSketchesFunctions {
private static final String SKETCH_TO_VARIANCES = "variances";
private static final String SKETCH_TO_PERCENTILE = "percentile";
- private final Registry system;
+ private final List<SketchDescriptor> sketchClasses;
+ private final ArrayList<UDFDescriptor> descriptors;
+
+ private DataSketchesFunctions() {
+ this.sketchClasses = new ArrayList<SketchDescriptor>();
+ this.descriptors = new ArrayList<HiveUDFPlugin.UDFDescriptor>();
+ registerHll();
+ registerCpc();
+ registerKll();
+ registerTheta();
+ registerTuple();
+ registerQuantiles();
+ registerFrequencies();
+
+ buildCalciteFns();
+ buildDescritors();
+ }
+
+ @Override
+ public Iterable<UDFDescriptor> getDescriptors() {
+ return descriptors;
+ }
+
+ private void buildDescritors() {
+ for (SketchDescriptor sketchDescriptor : sketchClasses) {
+ descriptors.addAll(sketchDescriptor.fnMap.values());
+ }
+ }
+
+ private void buildCalciteFns() {
+ for (SketchDescriptor sd : sketchClasses) {
+ // Mergability is exposed to Calcite; which enables to use it during rollup.
+ RelProtoDataType sketchType = RelDataTypeImpl.proto(SqlTypeName.BINARY, true);
+
+ SketchFunctionDescriptor sketchSFD = sd.fnMap.get(DATA_TO_SKETCH);
+ SketchFunctionDescriptor unionSFD = sd.fnMap.get(UNION_SKETCH);
+
+ if (sketchSFD == null || unionSFD == null) {
+ continue;
+ }
+
+ HiveMergeableAggregate unionFn = new HiveMergeableAggregate(unionSFD.name,
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.explicit(sketchType),
+ InferTypes.ANY_NULLABLE,
+ OperandTypes.family(),
+ null);
+
+ HiveMergeableAggregate sketchFn = new HiveMergeableAggregate(sketchSFD.name,
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.explicit(sketchType),
+ InferTypes.ANY_NULLABLE,
+ OperandTypes.family(),
+ unionFn);
+
+ unionSFD.setCalciteFunction(unionFn);
+ sketchSFD.setCalciteFunction(sketchFn);
+ }
+ }
+
+
+ private void registerHiveFunctionsInternal(Registry system) {
+ for (SketchDescriptor sketchDescriptor : sketchClasses) {
+ Collection<SketchFunctionDescriptor> functions = sketchDescriptor.fnMap.values();
+ for (SketchFunctionDescriptor fn : functions) {
+ if (UDF.class.isAssignableFrom(fn.udfClass)) {
+ system.registerUDF(fn.name, (Class<? extends UDF>) fn.udfClass, false);
+ continue;
+ }
+ if (GenericUDAFResolver2.class.isAssignableFrom(fn.udfClass)) {
+ String name = fn.name;
+ try {
+ system.registerGenericUDAF(name, ((Class<? extends GenericUDAFResolver2>) fn.udfClass).newInstance());
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new RuntimeException("Unable to register: " + name, e);
+ }
+ continue;
+ }
+ if (GenericUDTF.class.isAssignableFrom(fn.udfClass)) {
+ system.registerGenericUDTF(fn.name, (Class<? extends GenericUDTF>) fn.udfClass);
+ continue;
+ }
+ throw new RuntimeException("Don't know how to register: " + fn.name);
+ }
+ }
+
+ }
+
+ private static class SketchFunctionDescriptor implements HiveUDFPlugin.UDFDescriptor {
+ String name;
+ Class<?> udfClass;
+ private SqlFunction calciteFunction;
+
+ public SketchFunctionDescriptor(String name, Class<?> udfClass) {
+ this.name = name;
+ this.udfClass = udfClass;
+ }
+
+ @Override
+ public Class<?> getUDFClass() {
+ return udfClass;
+ }
+
+ @Override
+ public String getFunctionName() {
+ return name;
+ }
- public DataSketchesFunctions(Registry system) {
- this.system = system;
+ @Override
+ public Optional<SqlFunction> getCalciteFunction() {
+ return Optional.ofNullable(calciteFunction);
+ }
+
+ public void setCalciteFunction(SqlFunction calciteFunction) {
+ this.calciteFunction = calciteFunction;
+ }
}
- public static void register(Registry system) {
- DataSketchesFunctions dsf = new DataSketchesFunctions(system);
- String prefix = "ds";
- dsf.registerHll(prefix);
- dsf.registerCpc(prefix);
- dsf.registerKll(prefix);
- dsf.registerTheta(prefix);
- dsf.registerTuple(prefix);
- dsf.registerQuantiles(prefix);
- dsf.registerFrequencies(prefix);
+ private static class SketchDescriptor {
+ Map<String, SketchFunctionDescriptor> fnMap;
+ private String functionPrefix;
+
+ public SketchDescriptor(String string) {
+ fnMap = new HashMap<String, SketchFunctionDescriptor>();
+ functionPrefix = DATASKETCHES_PREFIX + "_" + string + "_";
+ }
+
+ private void register(String name, Class<?> clazz) {
+ fnMap.put(name, new SketchFunctionDescriptor(functionPrefix + name, clazz));
+ }
}
- private void registerHll(String prefix) {
- String p = prefix + "_hll_";
- registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
- registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class,
- p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
- registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
- registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1);
- registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH);
+ private void registerHll() {
+ SketchDescriptor sd = new SketchDescriptor("hll");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.hll.DataToSketchUDAF.class);
+ sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+ org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class);
+ sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.hll.SketchToEstimateUDF.class);
+ sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.hll.SketchToStringUDF.class);
+ sd.register(UNION_SKETCH1, org.apache.datasketches.hive.hll.UnionSketchUDF.class);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.hll.UnionSketchUDAF.class);
+ sketchClasses.add(sd);
}
- private void registerCpc(String prefix) {
- String p = prefix + "_cpc_";
- registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ private void registerCpc() {
+ SketchDescriptor sd = new SketchDescriptor("cpc");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.cpc.DataToSketchUDAF.class);
// FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF
- registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class,
- p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+ sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+ org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class);
// FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF
- registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
- registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1);
- registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH);
- }
-
- private void registerKll(String prefix) {
- String p = prefix + "_kll_";
- registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
- registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH);
- registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH);
-
- registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N);
- registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF);
- registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF);
- registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES);
- registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE);
- registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK);
- }
-
- private void registerTheta(String prefix) {
- String p = prefix + "_theta_";
- registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.cpc.GetEstimateUDF.class);
+ sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.cpc.SketchToStringUDF.class);
+ sd.register(UNION_SKETCH1, org.apache.datasketches.hive.cpc.UnionSketchUDF.class);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.cpc.UnionSketchUDAF.class);
+ sketchClasses.add(sd);
+ }
+
+ private void registerKll() {
+ SketchDescriptor sd = new SketchDescriptor("kll");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.kll.DataToSketchUDAF.class);
+ sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.kll.SketchToStringUDF.class);
+ // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p , UNION_SKETCH);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.kll.UnionSketchUDAF.class);
+
+ sd.register(GET_N, org.apache.datasketches.hive.kll.GetNUDF.class);
+ sd.register(GET_CDF, org.apache.datasketches.hive.kll.GetCdfUDF.class);
+ sd.register(GET_PMF, org.apache.datasketches.hive.kll.GetPmfUDF.class);
+ sd.register(GET_QUANTILES, org.apache.datasketches.hive.kll.GetQuantilesUDF.class);
+ sd.register(GET_QUANTILE, org.apache.datasketches.hive.kll.GetQuantileUDF.class);
+ sd.register(GET_RANK, org.apache.datasketches.hive.kll.GetRankUDF.class);
+ sketchClasses.add(sd);
+ }
+
+ private void registerTheta() {
+ SketchDescriptor sd = new SketchDescriptor("theta");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.theta.DataToSketchUDAF.class);
// FIXME: missing?
//registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1);
- registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH);
- registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1);
- registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH);
- registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE);
- registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH);
+ sd.register(UNION_SKETCH1, org.apache.datasketches.hive.theta.UnionSketchUDF.class);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.theta.UnionSketchUDAF.class);
+ sd.register(INTERSECT_SKETCH1, org.apache.datasketches.hive.theta.IntersectSketchUDF.class);
+ sd.register(INTERSECT_SKETCH, org.apache.datasketches.hive.theta.IntersectSketchUDAF.class);
+ sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.theta.EstimateSketchUDF.class);
+ sd.register(EXCLUDE_SKETCH, org.apache.datasketches.hive.theta.ExcludeSketchUDF.class);
+ sketchClasses.add(sd);
}
- private void registerTuple(String prefix) {
- registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble");
- registerTupleDoubleSummary(prefix + "_tuple_doublesummary");
+ private void registerTuple() {
+ registerTupleArrayOfDoubles();
+ registerTupleDoubleSummary();
}
- private void registerTupleArrayOfDoubles(String string) {
- String p = string + "_";
- registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
- // FIXME: missing?
- //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class,
- p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class,
- p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class,
- p + SKETCH_TO_QUANTILES_SKETCH);
- registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES);
- registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES);
- }
-
- private void registerTupleDoubleSummary(String string) {
- String p = string + "_";
- registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH);
+ private void registerTupleArrayOfDoubles() {
+ SketchDescriptor sd = new SketchDescriptor("tuple_arrayofdouble");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class);
// FIXME: missing?
+ //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p , SKETCH_TO_STRING);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class);
+ sd.register(T_TEST, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class);
+ sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class);
+ sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+ org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class);
+ sd.register(SKETCH_TO_MEANS, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class);
+ sd.register(SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES,
+ org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class);
+ sd.register(SKETCH_TO_QUANTILES_SKETCH,
+ org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class);
+ sd.register(SKETCH_TO_VALUES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class);
+ sd.register(SKETCH_TO_VARIANCES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class);
+ sketchClasses.add(sd);
+ }
+
+ private void registerTupleDoubleSummary() {
+ SketchDescriptor sd = new SketchDescriptor("tuple_doublesummary");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class);
//registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
- registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH);
- registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
- registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class);
+ sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class);
+ sd.register(SKETCH_TO_PERCENTILE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class);
+ sketchClasses.add(sd);
}
- private void registerQuantiles(String prefix) {
- registerQuantilesString(prefix + "_quantile");
- registerQuantilesDoubles(prefix + "_quantile");
+ private void registerQuantiles() {
+ registerQuantilesString();
+ registerQuantilesDoubles();
}
- private void registerFrequencies(String prefix) {
- String p = prefix + "_freq_";
- registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+ private void registerFrequencies() {
+ SketchDescriptor sd = new SketchDescriptor("freq");
+
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class);
// FIXME: missing?
//registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
//registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
- registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
- registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class,
- p + GET_FREQUENT_ITEMS);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class);
+ sd.register(GET_FREQUENT_ITEMS,
+ org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class);
+ sketchClasses.add(sd);
}
- private void registerQuantilesString(String prefix) {
- String p = prefix + "_strings_";
- registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
- registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING);
- //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
- registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
- registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N);
- registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K);
- registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF);
- registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF);
- registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE);
- registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES);
- }
-
- private void registerQuantilesDoubles(String prefix) {
- String p = prefix + "_doubles_";
- registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
- registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
- //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
- registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH);
- registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N);
- registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K);
- registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF);
- registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF);
- registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE);
- registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES);
- }
-
- private void registerUDF(Class<? extends UDF> udfClass, String name) {
- system.registerUDF(name, udfClass, false);
- }
-
- private void registerUDAF(Class<? extends GenericUDAFResolver2> udafClass, String name) {
- try {
- system.registerGenericUDAF(name, udafClass.newInstance());
- } catch (InstantiationException | IllegalAccessException e) {
- throw new RuntimeException("Unable to register: " + name, e);
- }
+ private void registerQuantilesString() {
+ SketchDescriptor sd = new SketchDescriptor("quantile_strings");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class);
+ sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class);
+ //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class);
+ sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class);
+ sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class);
+ sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class);
+ sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class);
+ sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class);
+ sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class);
+ sketchClasses.add(sd);
}
- private void registerUDTF(Class<? extends GenericUDTF> udtfClass, String name) {
- system.registerGenericUDTF(name, udtfClass);
+ private void registerQuantilesDoubles() {
+ SketchDescriptor sd = new SketchDescriptor("quantile_doubles");
+ sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class);
+ sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class);
+ //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH);
+ sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class);
+ sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class);
+ sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class);
+ sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class);
+ sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class);
+ sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class);
+ sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class);
+ sketchClasses.add(sd);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index dc3781a..b0c5862 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -473,7 +473,7 @@ public final class FunctionRegistry {
system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont());
system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc());
- DataSketchesFunctions.register(system);
+ system.registerUDFPlugin(DataSketchesFunctions.INSTANCE);
// Generic UDFs
system.registerGenericUDF("reflect", GenericUDFReflect.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
index 76dd66e..40e9e97 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
@@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hive.common.util.ReflectionUtil;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
+import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor;
import java.io.IOException;
import java.util.Collections;
@@ -812,4 +814,28 @@ public class Registry {
}
return null;
}
+
+ public void registerUDFPlugin(HiveUDFPlugin instance) {
+ Iterable<UDFDescriptor> x = instance.getDescriptors();
+ for (UDFDescriptor fn : x) {
+ if (UDF.class.isAssignableFrom(fn.getUDFClass())) {
+ registerUDF(fn.getFunctionName(), (Class<? extends UDF>) fn.getUDFClass(), false);
+ continue;
+ }
+ if (GenericUDAFResolver2.class.isAssignableFrom(fn.getUDFClass())) {
+ String name = fn.getFunctionName();
+ try {
+ registerGenericUDAF(name, ((Class<? extends GenericUDAFResolver2>) fn.getUDFClass()).newInstance());
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new RuntimeException("Unable to register: " + name, e);
+ }
+ continue;
+ }
+ if (GenericUDTF.class.isAssignableFrom(fn.getUDFClass())) {
+ registerGenericUDTF(fn.getFunctionName(), (Class<? extends GenericUDTF>) fn.getUDFClass());
+ continue;
+ }
+ throw new RuntimeException("Don't know how to register: " + fn.getFunctionName());
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
index f50779d..184a026 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
@@ -35,6 +35,7 @@ import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
@@ -139,6 +140,10 @@ public class HiveRelBuilder extends RelBuilder {
}
public static SqlAggFunction getRollup(SqlAggFunction aggregation) {
+ if (aggregation instanceof HiveMergeableAggregate) {
+ HiveMergeableAggregate mAgg = (HiveMergeableAggregate) aggregation;
+ return mAgg.getMergeAggFunction();
+ }
if (aggregation instanceof HiveSqlSumAggFunction
|| aggregation instanceof HiveSqlMinMaxAggFunction
|| aggregation instanceof HiveSqlSumEmptyIsZeroAggFunction) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java
new file mode 100644
index 0000000..041345a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+/**
+ * Mergeable aggregate.
+ *
+ * A mergeable aggregate is:
+ * - accepts the same kind as inputs as the output (an X^n -> X function)
+ *
+ * Example: the SUM function is a great example; since SUM of SUM -s is the overall sum.
+ */
+public class HiveMergeableAggregate extends SqlAggFunction {
+
+ private SqlAggFunction mergeAgg;
+
+ public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference,
+ SqlOperandTypeChecker operandTypeChecker) {
+ this(string, kind, returnTypeInference, operandTypeInference, operandTypeChecker, null);
+ }
+
+ public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
+ SqlAggFunction unionFn) {
+ super(
+ string, kind,
+ returnTypeInference,
+ operandTypeInference,
+ operandTypeChecker,
+ SqlFunctionCategory.NUMERIC);
+ if (unionFn == null) {
+ this.mergeAgg = this;
+ } else {
+ this.mergeAgg = unionFn;
+ }
+
+ }
+
+ public SqlAggFunction getMergeAggFunction() {
+ return mergeAgg;
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
index 468e6f8..974dab1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -125,5 +125,3 @@ public class HiveSqlSumAggFunction extends SqlAggFunction implements CanAggregat
}
}
}
-
-// End SqlSumAggFunction.java
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index a555749..07ca87f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -20,9 +20,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.lang.annotation.Annotation;
import java.util.List;
import java.util.Map;
-
+import java.util.Optional;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -37,6 +38,7 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.util.Util;
import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hive.ql.exec.DataSketchesFunctions;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -78,6 +80,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
+import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -468,6 +472,19 @@ public class SqlFunctionConverter {
);
registerFunction("date_add", HiveDateAddSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_add"));
registerFunction("date_sub", HiveDateSubSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_sub"));
+
+ registerPlugin(DataSketchesFunctions.INSTANCE);
+ }
+
+ private void registerPlugin(HiveUDFPlugin plugin) {
+ for (UDFDescriptor udfDesc : plugin.getDescriptors()) {
+ Optional<SqlFunction> calciteFunction = udfDesc.getCalciteFunction();
+ if (calciteFunction.isPresent()) {
+ registerDuplicateFunction(udfDesc.getFunctionName(), calciteFunction.get(),
+ hToken(HiveParser.Identifier, udfDesc.getFunctionName()));
+ }
+ }
+
}
private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
@@ -525,7 +542,7 @@ public class SqlFunctionConverter {
}
private static CalciteUDFInfo getUDFInfo(String hiveUdfName,
- ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
+ List<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
CalciteUDFInfo udfInfo = new CalciteUDFInfo();
udfInfo.udfName = hiveUdfName;
udfInfo.returnTypeInference = ReturnTypes.explicit(calciteRetType);
diff --git a/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java b/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java
new file mode 100644
index 0000000..41c198c
--- /dev/null
+++ b/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.plugin.api;
+
+import java.util.Optional;
+
+import org.apache.calcite.sql.SqlFunction;
+
+public interface HiveUDFPlugin {
+
+ public interface UDFDescriptor {
+ Class<?> getUDFClass();
+ String getFunctionName();
+ Optional<SqlFunction> getCalciteFunction();
+ }
+
+ Iterable<UDFDescriptor> getDescriptors();
+
+}
diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q
new file mode 100644
index 0000000..e1a2054
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q
@@ -0,0 +1,32 @@
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.stats.fetch.column.stats=true;
+set hive.materializedview.rewriting=true;
+set hive.fetch.task.conversion=none;
+
+create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true');
+
+insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+;
+
+-- create an mv for the intermediate results
+create materialized view mv_1 as
+ select category, ds_hll_sketch(id),count(id) from sketch_input group by category;
+
+-- see if we use the mv
+explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category;
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category;
+
+-- the mv should be used - the rollup should be possible
+explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input;
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input;
+
+drop materialized view mv_1;
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out
new file mode 100644
index 0000000..ee656b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out
@@ -0,0 +1,187 @@
+PREHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+ (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: create materialized view mv_1 as
+ select category, ds_hll_sketch(id),count(id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: create materialized view mv_1 as
+ select category, ds_hll_sketch(id),count(id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+PREHOOK: query: explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: default.mv_1
+ Statistics: Num rows: 2 Data size: 362 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: category (type: char(1)), round(ds_hll_estimate(_c1)) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+a 10.0
+b 10.0
+PREHOOK: query: explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: default.mv_1
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _c1 (type: binary)
+ outputColumnNames: _c1
+ Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: ds_hll_union(_c1)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<lgk:int,type:string,sketch:binary>)
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: ds_hll_union(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: round(ds_hll_estimate(_col0)) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+15.0
+PREHOOK: query: drop materialized view mv_1
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_1
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: drop materialized view mv_1
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Output: default@mv_1