You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/04/04 19:05:29 UTC

[hive] 01/02: HIVE-23030: Enable sketch union-s to be rolled up (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 216e73a7ddd58974e6b1151d1b8d0e26f5f69239
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Sat Apr 4 18:51:39 2020 +0000

    HIVE-23030: Enable sketch union-s to be rolled up (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
    
    Signed-off-by: Zoltan Haindrich <zh...@cloudera.com>
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../hadoop/hive/ql/exec/DataSketchesFunctions.java | 397 ++++++++++++++-------
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |   2 +-
 .../org/apache/hadoop/hive/ql/exec/Registry.java   |  26 ++
 .../hive/ql/optimizer/calcite/HiveRelBuilder.java  |   5 +
 .../calcite/functions/HiveMergeableAggregate.java  |  66 ++++
 .../calcite/functions/HiveSqlSumAggFunction.java   |   2 -
 .../calcite/translator/SqlFunctionConverter.java   |  21 +-
 .../org/apache/hive/plugin/api/HiveUDFPlugin.java  |  35 ++
 .../sketches_materialized_view_rollup.q            |  32 ++
 .../llap/sketches_materialized_view_rollup.q.out   | 187 ++++++++++
 11 files changed, 634 insertions(+), 140 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f54c96e..d2c9127 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -824,6 +824,7 @@ minillaplocal.query.files=\
   schq_ingest.q,\
   sketches_hll.q,\
   sketches_theta.q,\
+  sketches_materialized_view_rollup.q,\
   table_access_keys_stats.q,\
   temp_table_llap_partitioned.q,\
   tez_bmj_schema_evolution.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
index b9d265f..eec90c6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
@@ -18,15 +18,35 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.calcite.rel.type.RelDataTypeImpl;
+import org.apache.calcite.rel.type.RelProtoDataType;
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
 
 /**
  * Registers functions from the DataSketches library as builtin functions.
  *
  * In an effort to show a more consistent
  */
-public class DataSketchesFunctions {
+public final class DataSketchesFunctions implements HiveUDFPlugin {
+
+  public static final DataSketchesFunctions INSTANCE = new DataSketchesFunctions();
+
+  private static final String DATASKETCHES_PREFIX = "ds";
 
   private static final String DATA_TO_SKETCH = "sketch";
   private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds";
@@ -53,169 +73,276 @@ public class DataSketchesFunctions {
   private static final String SKETCH_TO_VARIANCES = "variances";
   private static final String SKETCH_TO_PERCENTILE = "percentile";
 
-  private final Registry system;
+  private final List<SketchDescriptor> sketchClasses;
+  private final ArrayList<UDFDescriptor> descriptors;
+
+  private DataSketchesFunctions() {
+    this.sketchClasses = new ArrayList<SketchDescriptor>();
+    this.descriptors = new ArrayList<HiveUDFPlugin.UDFDescriptor>();
+    registerHll();
+    registerCpc();
+    registerKll();
+    registerTheta();
+    registerTuple();
+    registerQuantiles();
+    registerFrequencies();
+
+    buildCalciteFns();
+    buildDescritors();
+  }
+
+  @Override
+  public Iterable<UDFDescriptor> getDescriptors() {
+    return descriptors;
+  }
+
+  private void buildDescritors() {
+    for (SketchDescriptor sketchDescriptor : sketchClasses) {
+      descriptors.addAll(sketchDescriptor.fnMap.values());
+    }
+  }
+
+  private void buildCalciteFns() {
+    for (SketchDescriptor sd : sketchClasses) {
+      // Mergability is exposed to Calcite; which enables to use it during rollup.
+      RelProtoDataType sketchType = RelDataTypeImpl.proto(SqlTypeName.BINARY, true);
+
+      SketchFunctionDescriptor sketchSFD = sd.fnMap.get(DATA_TO_SKETCH);
+      SketchFunctionDescriptor unionSFD = sd.fnMap.get(UNION_SKETCH);
+
+      if (sketchSFD == null || unionSFD == null) {
+        continue;
+      }
+
+      HiveMergeableAggregate unionFn = new HiveMergeableAggregate(unionSFD.name,
+          SqlKind.OTHER_FUNCTION,
+          ReturnTypes.explicit(sketchType),
+          InferTypes.ANY_NULLABLE,
+          OperandTypes.family(),
+          null);
+
+      HiveMergeableAggregate sketchFn = new HiveMergeableAggregate(sketchSFD.name,
+          SqlKind.OTHER_FUNCTION,
+          ReturnTypes.explicit(sketchType),
+          InferTypes.ANY_NULLABLE,
+          OperandTypes.family(),
+          unionFn);
+
+      unionSFD.setCalciteFunction(unionFn);
+      sketchSFD.setCalciteFunction(sketchFn);
+    }
+  }
+
+
+  private void registerHiveFunctionsInternal(Registry system) {
+    for (SketchDescriptor sketchDescriptor : sketchClasses) {
+      Collection<SketchFunctionDescriptor> functions = sketchDescriptor.fnMap.values();
+      for (SketchFunctionDescriptor fn : functions) {
+        if (UDF.class.isAssignableFrom(fn.udfClass)) {
+          system.registerUDF(fn.name, (Class<? extends UDF>) fn.udfClass, false);
+          continue;
+        }
+        if (GenericUDAFResolver2.class.isAssignableFrom(fn.udfClass)) {
+          String name = fn.name;
+          try {
+            system.registerGenericUDAF(name, ((Class<? extends GenericUDAFResolver2>) fn.udfClass).newInstance());
+          } catch (InstantiationException | IllegalAccessException e) {
+            throw new RuntimeException("Unable to register: " + name, e);
+          }
+          continue;
+        }
+        if (GenericUDTF.class.isAssignableFrom(fn.udfClass)) {
+          system.registerGenericUDTF(fn.name, (Class<? extends GenericUDTF>) fn.udfClass);
+          continue;
+        }
+        throw new RuntimeException("Don't know how to register: " + fn.name);
+      }
+    }
+
+  }
+
+  private static class SketchFunctionDescriptor implements HiveUDFPlugin.UDFDescriptor {
+    String name;
+    Class<?> udfClass;
+    private SqlFunction calciteFunction;
+
+    public SketchFunctionDescriptor(String name, Class<?> udfClass) {
+      this.name = name;
+      this.udfClass = udfClass;
+    }
+
+    @Override
+    public Class<?> getUDFClass() {
+      return udfClass;
+    }
+
+    @Override
+    public String getFunctionName() {
+      return name;
+    }
 
-  public DataSketchesFunctions(Registry system) {
-    this.system = system;
+    @Override
+    public Optional<SqlFunction> getCalciteFunction() {
+      return Optional.ofNullable(calciteFunction);
+    }
+
+    public void setCalciteFunction(SqlFunction calciteFunction) {
+      this.calciteFunction = calciteFunction;
+    }
   }
 
-  public static void register(Registry system) {
-    DataSketchesFunctions dsf = new DataSketchesFunctions(system);
-    String prefix = "ds";
-    dsf.registerHll(prefix);
-    dsf.registerCpc(prefix);
-    dsf.registerKll(prefix);
-    dsf.registerTheta(prefix);
-    dsf.registerTuple(prefix);
-    dsf.registerQuantiles(prefix);
-    dsf.registerFrequencies(prefix);
+  private static class SketchDescriptor {
+    Map<String, SketchFunctionDescriptor> fnMap;
+    private String functionPrefix;
+
+    public SketchDescriptor(String string) {
+      fnMap = new HashMap<String, SketchFunctionDescriptor>();
+      functionPrefix = DATASKETCHES_PREFIX + "_" + string + "_";
+    }
+
+    private void register(String name, Class<?> clazz) {
+      fnMap.put(name, new SketchFunctionDescriptor(functionPrefix + name, clazz));
+    }
   }
 
-  private void registerHll(String prefix) {
-    String p = prefix + "_hll_";
-    registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
-    registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class,
-        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
-    registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
-    registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1);
-    registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH);
+  private void registerHll() {
+    SketchDescriptor sd = new SketchDescriptor("hll");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.hll.DataToSketchUDAF.class);
+    sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+        org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class);
+    sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.hll.SketchToEstimateUDF.class);
+    sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.hll.SketchToStringUDF.class);
+    sd.register(UNION_SKETCH1, org.apache.datasketches.hive.hll.UnionSketchUDF.class);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.hll.UnionSketchUDAF.class);
+    sketchClasses.add(sd);
   }
 
-  private void registerCpc(String prefix) {
-    String p = prefix + "_cpc_";
-    registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+  private void registerCpc() {
+    SketchDescriptor sd = new SketchDescriptor("cpc");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.cpc.DataToSketchUDAF.class);
     // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF
-    registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class,
-        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+    sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+        org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class);
     // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF
-    registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
-    registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1);
-    registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH);
-  }
-
-  private void registerKll(String prefix) {
-    String p = prefix + "_kll_";
-    registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
-    registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    //    registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH);
-    registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH);
-
-    registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N);
-    registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF);
-    registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF);
-    registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES);
-    registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE);
-    registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK);
-  }
-
-  private void registerTheta(String prefix) {
-    String p = prefix + "_theta_";
-    registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+    sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.cpc.GetEstimateUDF.class);
+    sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.cpc.SketchToStringUDF.class);
+    sd.register(UNION_SKETCH1, org.apache.datasketches.hive.cpc.UnionSketchUDF.class);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.cpc.UnionSketchUDAF.class);
+    sketchClasses.add(sd);
+  }
+
+  private void registerKll() {
+    SketchDescriptor sd = new SketchDescriptor("kll");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.kll.DataToSketchUDAF.class);
+    sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.kll.SketchToStringUDF.class);
+    //    registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p , UNION_SKETCH);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.kll.UnionSketchUDAF.class);
+
+    sd.register(GET_N, org.apache.datasketches.hive.kll.GetNUDF.class);
+    sd.register(GET_CDF, org.apache.datasketches.hive.kll.GetCdfUDF.class);
+    sd.register(GET_PMF, org.apache.datasketches.hive.kll.GetPmfUDF.class);
+    sd.register(GET_QUANTILES, org.apache.datasketches.hive.kll.GetQuantilesUDF.class);
+    sd.register(GET_QUANTILE, org.apache.datasketches.hive.kll.GetQuantileUDF.class);
+    sd.register(GET_RANK, org.apache.datasketches.hive.kll.GetRankUDF.class);
+    sketchClasses.add(sd);
+  }
+
+  private void registerTheta() {
+    SketchDescriptor sd = new SketchDescriptor("theta");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.theta.DataToSketchUDAF.class);
     // FIXME: missing?
     //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1);
-    registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH);
-    registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1);
-    registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH);
-    registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE);
-    registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH);
+    sd.register(UNION_SKETCH1, org.apache.datasketches.hive.theta.UnionSketchUDF.class);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.theta.UnionSketchUDAF.class);
+    sd.register(INTERSECT_SKETCH1, org.apache.datasketches.hive.theta.IntersectSketchUDF.class);
+    sd.register(INTERSECT_SKETCH, org.apache.datasketches.hive.theta.IntersectSketchUDAF.class);
+    sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.theta.EstimateSketchUDF.class);
+    sd.register(EXCLUDE_SKETCH, org.apache.datasketches.hive.theta.ExcludeSketchUDF.class);
+    sketchClasses.add(sd);
 
   }
 
-  private void registerTuple(String prefix) {
-    registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble");
-    registerTupleDoubleSummary(prefix + "_tuple_doublesummary");
+  private void registerTuple() {
+    registerTupleArrayOfDoubles();
+    registerTupleDoubleSummary();
   }
 
-  private void registerTupleArrayOfDoubles(String string) {
-    String p = string + "_";
-    registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
-    // FIXME: missing?
-    //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class,
-        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class,
-        p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class,
-        p + SKETCH_TO_QUANTILES_SKETCH);
-    registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES);
-    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES);
-  }
-
-  private void registerTupleDoubleSummary(String string) {
-    String p = string + "_";
-    registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH);
+  private void registerTupleArrayOfDoubles() {
+    SketchDescriptor sd = new SketchDescriptor("tuple_arrayofdouble");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class);
     // FIXME: missing?
+    //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p , SKETCH_TO_STRING);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class);
+    sd.register(T_TEST, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class);
+    sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class);
+    sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS,
+        org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class);
+    sd.register(SKETCH_TO_MEANS, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class);
+    sd.register(SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES,
+        org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class);
+    sd.register(SKETCH_TO_QUANTILES_SKETCH,
+        org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class);
+    sd.register(SKETCH_TO_VALUES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class);
+    sd.register(SKETCH_TO_VARIANCES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class);
+    sketchClasses.add(sd);
+  }
+
+  private void registerTupleDoubleSummary() {
+    SketchDescriptor sd = new SketchDescriptor("tuple_doublesummary");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class);
     //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
-    registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH);
-    registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
-    registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class);
+    sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class);
+    sd.register(SKETCH_TO_PERCENTILE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class);
+    sketchClasses.add(sd);
   }
 
-  private void registerQuantiles(String prefix) {
-    registerQuantilesString(prefix + "_quantile");
-    registerQuantilesDoubles(prefix + "_quantile");
+  private void registerQuantiles() {
+    registerQuantilesString();
+    registerQuantilesDoubles();
   }
 
-  private void registerFrequencies(String prefix) {
-    String p = prefix + "_freq_";
-    registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+  private void registerFrequencies() {
+    SketchDescriptor sd = new SketchDescriptor("freq");
+
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class);
     // FIXME: missing?
     //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
     //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
-    registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
-    registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class,
-        p + GET_FREQUENT_ITEMS);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class);
+    sd.register(GET_FREQUENT_ITEMS,
+        org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class);
+    sketchClasses.add(sd);
   }
 
-  private void registerQuantilesString(String prefix) {
-    String p = prefix + "_strings_";
-    registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
-    registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING);
-    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
-    registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES);
-  }
-
-  private void registerQuantilesDoubles(String prefix) {
-    String p = prefix + "_doubles_";
-    registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
-    registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
-    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
-    registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE);
-    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES);
-  }
-
-  private void registerUDF(Class<? extends UDF> udfClass, String name) {
-    system.registerUDF(name, udfClass, false);
-  }
-
-  private void registerUDAF(Class<? extends GenericUDAFResolver2> udafClass, String name) {
-    try {
-      system.registerGenericUDAF(name, udafClass.newInstance());
-    } catch (InstantiationException | IllegalAccessException e) {
-      throw new RuntimeException("Unable to register: " + name, e);
-    }
+  private void registerQuantilesString() {
+    SketchDescriptor sd = new SketchDescriptor("quantile_strings");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class);
+    sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class);
+    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class);
+    sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class);
+    sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class);
+    sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class);
+    sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class);
+    sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class);
+    sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class);
+    sketchClasses.add(sd);
   }
 
-  private void registerUDTF(Class<? extends GenericUDTF> udtfClass, String name) {
-    system.registerGenericUDTF(name, udtfClass);
+  private void registerQuantilesDoubles() {
+    SketchDescriptor sd = new SketchDescriptor("quantile_doubles");
+    sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class);
+    sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class);
+    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH);
+    sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class);
+    sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class);
+    sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class);
+    sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class);
+    sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class);
+    sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class);
+    sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class);
+    sketchClasses.add(sd);
   }
 
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index dc3781a..b0c5862 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -473,7 +473,7 @@ public final class FunctionRegistry {
     system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont());
     system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc());
 
-    DataSketchesFunctions.register(system);
+    system.registerUDFPlugin(DataSketchesFunctions.INSTANCE);
 
     // Generic UDFs
     system.registerGenericUDF("reflect", GenericUDFReflect.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
index 76dd66e..40e9e97 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java
@@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionResolver;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hive.common.util.ReflectionUtil;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
+import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -812,4 +814,28 @@ public class Registry {
     }
     return null;
   }
+
+  public void registerUDFPlugin(HiveUDFPlugin instance) {
+    Iterable<UDFDescriptor> x = instance.getDescriptors();
+    for (UDFDescriptor fn : x) {
+      if (UDF.class.isAssignableFrom(fn.getUDFClass())) {
+        registerUDF(fn.getFunctionName(), (Class<? extends UDF>) fn.getUDFClass(), false);
+        continue;
+      }
+      if (GenericUDAFResolver2.class.isAssignableFrom(fn.getUDFClass())) {
+        String name = fn.getFunctionName();
+        try {
+          registerGenericUDAF(name, ((Class<? extends GenericUDAFResolver2>) fn.getUDFClass()).newInstance());
+        } catch (InstantiationException | IllegalAccessException e) {
+          throw new RuntimeException("Unable to register: " + name, e);
+        }
+        continue;
+      }
+      if (GenericUDTF.class.isAssignableFrom(fn.getUDFClass())) {
+        registerGenericUDTF(fn.getFunctionName(), (Class<? extends GenericUDTF>) fn.getUDFClass());
+        continue;
+      }
+      throw new RuntimeException("Don't know how to register: " + fn.getFunctionName());
+    }
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
index f50779d..184a026 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java
@@ -35,6 +35,7 @@ import org.apache.calcite.tools.FrameworkConfig;
 import org.apache.calcite.tools.Frameworks;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.tools.RelBuilderFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
 import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
 import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
@@ -139,6 +140,10 @@ public class HiveRelBuilder extends RelBuilder {
   }
 
   public static SqlAggFunction getRollup(SqlAggFunction aggregation) {
+    if (aggregation instanceof HiveMergeableAggregate) {
+      HiveMergeableAggregate mAgg = (HiveMergeableAggregate) aggregation;
+      return mAgg.getMergeAggFunction();
+    }
     if (aggregation instanceof HiveSqlSumAggFunction
         || aggregation instanceof HiveSqlMinMaxAggFunction
         || aggregation instanceof HiveSqlSumEmptyIsZeroAggFunction) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java
new file mode 100644
index 0000000..041345a
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+/**
+ * Mergeable aggregate.
+ *
+ * A mergeable aggregate is:
+ * - accepts the same kind as inputs as the output (an X^n -> X function)
+ *
+ * Example: the SUM function is a great example; since SUM of SUM -s is the overall sum.
+ */
+public class HiveMergeableAggregate extends SqlAggFunction  {
+
+  private SqlAggFunction mergeAgg;
+
+  public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+      SqlOperandTypeInference operandTypeInference,
+      SqlOperandTypeChecker operandTypeChecker) {
+    this(string, kind, returnTypeInference, operandTypeInference, operandTypeChecker, null);
+  }
+
+  public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+      SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
+      SqlAggFunction unionFn) {
+    super(
+        string, kind,
+        returnTypeInference,
+        operandTypeInference,
+        operandTypeChecker,
+        SqlFunctionCategory.NUMERIC);
+    if (unionFn == null) {
+      this.mergeAgg = this;
+    } else {
+      this.mergeAgg = unionFn;
+    }
+
+  }
+
+  public SqlAggFunction getMergeAggFunction() {
+    return mergeAgg;
+  }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
index 468e6f8..974dab1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java
@@ -125,5 +125,3 @@ public class HiveSqlSumAggFunction extends SqlAggFunction implements CanAggregat
     }
   }
 }
-
-// End SqlSumAggFunction.java
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index a555749..07ca87f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -20,9 +20,10 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
 import java.lang.annotation.Annotation;
 import java.util.List;
 import java.util.Map;
-
+import java.util.Optional;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
@@ -37,6 +38,7 @@ import org.apache.calcite.sql.type.SqlReturnTypeInference;
 import org.apache.calcite.sql.type.SqlTypeFamily;
 import org.apache.calcite.util.Util;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.hive.ql.exec.DataSketchesFunctions;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -78,6 +80,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hive.plugin.api.HiveUDFPlugin;
+import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -468,6 +472,19 @@ public class SqlFunctionConverter {
       );
       registerFunction("date_add", HiveDateAddSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_add"));
       registerFunction("date_sub", HiveDateSubSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_sub"));
+
+      registerPlugin(DataSketchesFunctions.INSTANCE);
+    }
+
+    private void registerPlugin(HiveUDFPlugin plugin) {
+      for (UDFDescriptor udfDesc : plugin.getDescriptors()) {
+        Optional<SqlFunction> calciteFunction = udfDesc.getCalciteFunction();
+        if (calciteFunction.isPresent()) {
+          registerDuplicateFunction(udfDesc.getFunctionName(), calciteFunction.get(),
+              hToken(HiveParser.Identifier, udfDesc.getFunctionName()));
+        }
+      }
+
     }
 
     private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
@@ -525,7 +542,7 @@ public class SqlFunctionConverter {
   }
 
   private static CalciteUDFInfo getUDFInfo(String hiveUdfName,
-      ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
+      List<RelDataType> calciteArgTypes, RelDataType calciteRetType) {
     CalciteUDFInfo udfInfo = new CalciteUDFInfo();
     udfInfo.udfName = hiveUdfName;
     udfInfo.returnTypeInference = ReturnTypes.explicit(calciteRetType);
diff --git a/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java b/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java
new file mode 100644
index 0000000..41c198c
--- /dev/null
+++ b/ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.plugin.api;
+
+import java.util.Optional;
+
+import org.apache.calcite.sql.SqlFunction;
+
+public interface HiveUDFPlugin {
+
+  public interface UDFDescriptor {
+    Class<?> getUDFClass();
+    String getFunctionName();
+    Optional<SqlFunction> getCalciteFunction();
+  }
+
+  Iterable<UDFDescriptor> getDescriptors();
+
+}
diff --git a/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q
new file mode 100644
index 0000000..e1a2054
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q
@@ -0,0 +1,32 @@
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.stats.fetch.column.stats=true;
+set hive.materializedview.rewriting=true;
+set hive.fetch.task.conversion=none;
+
+create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true');
+
+insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+; 
+
+-- create an mv for the intermediate results
+create  materialized view mv_1 as
+  select category, ds_hll_sketch(id),count(id) from sketch_input group by category;
+
+-- see if we use the mv
+explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category;
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category;
+
+-- the mv should be used - the rollup should be possible
+explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input;
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input;
+
+drop materialized view mv_1;
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out
new file mode 100644
index 0000000..ee656b0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out
@@ -0,0 +1,187 @@
+PREHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create table sketch_input (id int, category char(1))
+STORED AS ORC
+TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: create  materialized view mv_1 as
+  select category, ds_hll_sketch(id),count(id) from sketch_input group by category
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: create  materialized view mv_1 as
+  select category, ds_hll_sketch(id),count(id) from sketch_input group by category
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_1
+PREHOOK: query: explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: default.mv_1
+                  Statistics: Num rows: 2 Data size: 362 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: category (type: char(1)), round(ds_hll_estimate(_c1)) (type: double)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+a	10.0
+b	10.0
+PREHOOK: query: explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: default.mv_1
+                  Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _c1 (type: binary)
+                    outputColumnNames: _c1
+                    Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: ds_hll_union(_c1)
+                      minReductionHashAggr: 0.5
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: struct<lgk:int,type:string,sketch:binary>)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: ds_hll_union(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: round(ds_hll_estimate(_col0)) (type: double)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_1
+PREHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+POSTHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Input: default@sketch_input
+#### A masked pattern was here ####
+15.0
+PREHOOK: query: drop materialized view mv_1
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_1
+PREHOOK: Output: default@mv_1
+POSTHOOK: query: drop materialized view mv_1
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_1
+POSTHOOK: Output: default@mv_1