You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2020/03/23 07:59:22 UTC

[hive] branch master updated: HIVE-22940: Make the datasketches functions available as predefined functions (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 2105c66  HIVE-22940: Make the datasketches functions available as predefined functions (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
2105c66 is described below

commit 2105c6617ef9609dd8b2f712f596c2f9cc6d972e
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Mon Mar 23 07:58:54 2020 +0000

    HIVE-22940: Make the datasketches functions available as predefined functions (Zoltan Haindrich reviewed by Jesus Camacho Rodriguez)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../test/resources/testconfiguration.properties    |   2 +
 pom.xml                                            |   1 +
 ql/pom.xml                                         |  10 +
 .../hadoop/hive/ql/exec/DataSketchesFunctions.java | 221 +++++++++++++++++++++
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |   3 +-
 ql/src/test/queries/clientpositive/sketches_hll.q  |  16 ++
 .../test/queries/clientpositive/sketches_theta.q   |  33 +++
 .../results/clientpositive/llap/sketches_hll.q.out |  59 ++++++
 .../clientpositive/llap/sketches_theta.q.out       | 120 +++++++++++
 .../results/clientpositive/show_functions.q.out    | 136 +++++++++++++
 10 files changed, 599 insertions(+), 2 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f71ed3d..3510016 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -818,6 +818,8 @@ minillaplocal.query.files=\
   schq_materialized.q,\
   schq_analyze.q,\
   schq_ingest.q,\
+  sketches_hll.q,\
+  sketches_theta.q,\
   table_access_keys_stats.q,\
   temp_table_llap_partitioned.q,\
   tez_bmj_schema_evolution.q,\
diff --git a/pom.xml b/pom.xml
index af70972..579e745 100644
--- a/pom.xml
+++ b/pom.xml
@@ -228,6 +228,7 @@
     <json-path.version>2.4.0</json-path.version>
     <janino.version>3.0.11</janino.version>
     <snakeyaml.version>1.23</snakeyaml.version>
+    <datasketches.version>1.0.0-incubating</datasketches.version>
   </properties>
 
   <repositories>
diff --git a/ql/pom.xml b/ql/pom.xml
index 161a527..9b45d31 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -313,6 +313,11 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+       <groupId>org.apache.datasketches</groupId>
+       <artifactId>datasketches-hive</artifactId>
+       <version>${datasketches.version}</version>
+    </dependency>
+    <dependency>
       <groupId>com.lmax</groupId>
       <artifactId>disruptor</artifactId>
       <version>${disruptor.version}</version>
@@ -1007,6 +1012,7 @@
                   <include>io.dropwizard.metrics:metrics-jvm</include>
                   <include>io.dropwizard.metrics:metrics-json</include>
                   <include>com.zaxxer:HikariCP</include>
+                  <include>org.apache.datasketches:*</include>
                   <include>org.apache.calcite:*</include>
                   <include>org.apache.calcite.avatica:avatica</include>
                 </includes>
@@ -1040,6 +1046,10 @@
                   <pattern>com.google.thirdparty.publicsuffix</pattern>
                   <shadedPattern>org.apache.hive.com.google.thirdparty.publicsuffix</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>org.apache.datasketches</pattern>
+                  <shadedPattern>org.apache.hive.org.apache.datasketches</shadedPattern>
+                </relocation>
               </relocations>
             </configuration>
           </execution>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
new file mode 100644
index 0000000..b9d265f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+
+/**
+ * Registers functions from the DataSketches library as builtin functions.
+ *
+ * In an effort to show a more consistent
+ */
+public class DataSketchesFunctions {
+
+  private static final String DATA_TO_SKETCH = "sketch";
+  private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds";
+  private static final String SKETCH_TO_ESTIMATE = "estimate";
+  private static final String SKETCH_TO_STRING = "stringify";
+  private static final String UNION_SKETCH = "union";
+  private static final String UNION_SKETCH1 = "union_f";
+  private static final String GET_N = "n";
+  private static final String GET_CDF = "cdf";
+  private static final String GET_PMF = "pmf";
+  private static final String GET_QUANTILES = "quantiles";
+  private static final String GET_QUANTILE = "quantile";
+  private static final String GET_RANK = "rank";
+  private static final String INTERSECT_SKETCH = "intersect";
+  private static final String INTERSECT_SKETCH1 = "intersect_f";
+  private static final String EXCLUDE_SKETCH = "exclude";
+  private static final String GET_K = "k";
+  private static final String GET_FREQUENT_ITEMS = "frequent_items";
+  private static final String T_TEST = "ttest";
+  private static final String SKETCH_TO_MEANS = "means";
+  private static final String SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES = "n_retained";
+  private static final String SKETCH_TO_QUANTILES_SKETCH = "quantiles_sketch";
+  private static final String SKETCH_TO_VALUES = "values";
+  private static final String SKETCH_TO_VARIANCES = "variances";
+  private static final String SKETCH_TO_PERCENTILE = "percentile";
+
+  private final Registry system;
+
+  public DataSketchesFunctions(Registry system) {
+    this.system = system;
+  }
+
+  public static void register(Registry system) {
+    DataSketchesFunctions dsf = new DataSketchesFunctions(system);
+    String prefix = "ds";
+    dsf.registerHll(prefix);
+    dsf.registerCpc(prefix);
+    dsf.registerKll(prefix);
+    dsf.registerTheta(prefix);
+    dsf.registerTuple(prefix);
+    dsf.registerQuantiles(prefix);
+    dsf.registerFrequencies(prefix);
+  }
+
+  private void registerHll(String prefix) {
+    String p = prefix + "_hll_";
+    registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+    registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class,
+        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+    registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
+    registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1);
+    registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH);
+  }
+
+  private void registerCpc(String prefix) {
+    String p = prefix + "_cpc_";
+    registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+    // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF
+    registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class,
+        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+    // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF
+    registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
+    registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1);
+    registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH);
+  }
+
+  private void registerKll(String prefix) {
+    String p = prefix + "_kll_";
+    registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+    registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    //    registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH);
+    registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH);
+
+    registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N);
+    registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF);
+    registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF);
+    registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES);
+    registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE);
+    registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK);
+  }
+
+  private void registerTheta(String prefix) {
+    String p = prefix + "_theta_";
+    registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+    // FIXME: missing?
+    //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1);
+    registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH);
+    registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1);
+    registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH);
+    registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE);
+    registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH);
+
+  }
+
+  private void registerTuple(String prefix) {
+    registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble");
+    registerTupleDoubleSummary(prefix + "_tuple_doublesummary");
+  }
+
+  private void registerTupleArrayOfDoubles(String string) {
+    String p = string + "_";
+    registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
+    // FIXME: missing?
+    //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class,
+        p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class,
+        p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class,
+        p + SKETCH_TO_QUANTILES_SKETCH);
+    registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES);
+    registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES);
+  }
+
+  private void registerTupleDoubleSummary(String string) {
+    String p = string + "_";
+    registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH);
+    // FIXME: missing?
+    //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+    registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH);
+    registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
+    registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE);
+  }
+
+  private void registerQuantiles(String prefix) {
+    registerQuantilesString(prefix + "_quantile");
+    registerQuantilesDoubles(prefix + "_quantile");
+  }
+
+  private void registerFrequencies(String prefix) {
+    String p = prefix + "_freq_";
+    registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+    // FIXME: missing?
+    //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
+    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+    registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
+    registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class,
+        p + GET_FREQUENT_ITEMS);
+  }
+
+  private void registerQuantilesString(String prefix) {
+    String p = prefix + "_strings_";
+    registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+    registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING);
+    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+    registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES);
+  }
+
+  private void registerQuantilesDoubles(String prefix) {
+    String p = prefix + "_doubles_";
+    registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
+    registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
+    //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+    registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE);
+    registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES);
+  }
+
+  private void registerUDF(Class<? extends UDF> udfClass, String name) {
+    system.registerUDF(name, udfClass, false);
+  }
+
+  private void registerUDAF(Class<? extends GenericUDAFResolver2> udafClass, String name) {
+    try {
+      system.registerGenericUDAF(name, udafClass.newInstance());
+    } catch (InstantiationException | IllegalAccessException e) {
+      throw new RuntimeException("Unable to register: " + name, e);
+    }
+  }
+
+  private void registerUDTF(Class<? extends GenericUDTF> udtfClass, String name) {
+    system.registerGenericUDTF(name, udtfClass);
+  }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index db5ee8d..dc3781a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -140,8 +140,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.Pr
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
 import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -475,6 +473,7 @@ public final class FunctionRegistry {
     system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont());
     system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc());
 
+    DataSketchesFunctions.register(system);
 
     // Generic UDFs
     system.registerGenericUDF("reflect", GenericUDFReflect.class);
diff --git a/ql/src/test/queries/clientpositive/sketches_hll.q b/ql/src/test/queries/clientpositive/sketches_hll.q
new file mode 100644
index 0000000..56467a6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_hll.q
@@ -0,0 +1,16 @@
+-- prepare input data
+create temporary table sketch_input (id int, category char(1));
+insert into table sketch_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b');
+
+-- build sketches per category
+create temporary table sketch_intermediate (category char(1), sketch binary);
+insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category;
+
+-- get unique count estimates per category
+select category, ds_hll_estimate(sketch) from sketch_intermediate;
+
+
+-- union sketches across categories and get overall unique count estimate
+select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate;
diff --git a/ql/src/test/queries/clientpositive/sketches_theta.q b/ql/src/test/queries/clientpositive/sketches_theta.q
new file mode 100644
index 0000000..6ab7278
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sketches_theta.q
@@ -0,0 +1,33 @@
+-- see here: https://datasketches.apache.org/docs/Theta/ThetaHiveUDFs.html
+
+create temporary table theta_input (id int, category char(1));
+insert into table theta_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b');
+
+create temporary table sketch_intermediate (category char(1), sketch binary);
+insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category;
+
+select category, ds_theta_estimate(sketch) from sketch_intermediate;
+
+select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate;
+
+
+
+create temporary table sketch_input (id1 int, id2 int);
+insert into table sketch_input values
+  (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20);
+
+create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary);
+
+insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input;
+
+select
+  ds_theta_estimate(sketch1),
+  ds_theta_estimate(sketch2),
+  ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2;
+
diff --git a/ql/src/test/results/clientpositive/llap/sketches_hll.q.out b/ql/src/test/results/clientpositive/llap/sketches_hll.q.out
new file mode 100644
index 0000000..9ebce86
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_hll.q.out
@@ -0,0 +1,59 @@
+PREHOOK: query: create temporary table sketch_input (id int, category char(1))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create temporary table sketch_input (id int, category char(1))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate
+PREHOOK: query: insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@sketch_intermediate
+POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ]
+PREHOOK: query: select category, ds_hll_estimate(sketch) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select category, ds_hll_estimate(sketch) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+a	10.000000223517425
+b	10.000000223517425
+PREHOOK: query: select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+15.000000521540663
diff --git a/ql/src/test/results/clientpositive/llap/sketches_theta.q.out b/ql/src/test/results/clientpositive/llap/sketches_theta.q.out
new file mode 100644
index 0000000..b3ea64d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/sketches_theta.q.out
@@ -0,0 +1,120 @@
+PREHOOK: query: create temporary table theta_input (id int, category char(1))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@theta_input
+POSTHOOK: query: create temporary table theta_input (id int, category char(1))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@theta_input
+PREHOOK: query: insert into table theta_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@theta_input
+POSTHOOK: query: insert into table theta_input values
+  (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+  (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@theta_input
+POSTHOOK: Lineage: theta_input.category SCRIPT []
+POSTHOOK: Lineage: theta_input.id SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate
+PREHOOK: query: insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@theta_input
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@theta_input
+POSTHOOK: Output: default@sketch_intermediate
+POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(theta_input)theta_input.FieldSchema(name:category, type:char(1), comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(theta_input)theta_input.FieldSchema(name:id, type:int, comment:null), ]
+PREHOOK: query: select category, ds_theta_estimate(sketch) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select category, ds_theta_estimate(sketch) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+a	10.0
+b	10.0
+PREHOOK: query: select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+15.0
+PREHOOK: query: create temporary table sketch_input (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create temporary table sketch_input (id1 int, id2 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+  (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+  (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.id1 SCRIPT []
+POSTHOOK: Lineage: sketch_input.id2 SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate2
+POSTHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate2
+PREHOOK: query: insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@sketch_intermediate2
+POSTHOOK: query: insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@sketch_intermediate2
+POSTHOOK: Lineage: sketch_intermediate2.sketch1 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id1, type:int, comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate2.sketch2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id2, type:int, comment:null), ]
+PREHOOK: query: select
+  ds_theta_estimate(sketch1),
+  ds_theta_estimate(sketch2),
+  ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate2
+#### A masked pattern was here ####
+POSTHOOK: query: select
+  ds_theta_estimate(sketch1),
+  ds_theta_estimate(sketch2),
+  ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)),
+  ds_theta_estimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate2
+#### A masked pattern was here ####
+10.0	10.0	15.0	5.0	5.0	5.0
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 0453400..4b38cfb 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -105,6 +105,69 @@ decode
 degrees
 dense_rank
 div
+ds_cpc_estimate
+ds_cpc_estimate_bounds
+ds_cpc_sketch
+ds_cpc_stringify
+ds_cpc_union
+ds_cpc_union_f
+ds_freq_frequent_items
+ds_freq_sketch
+ds_freq_union
+ds_hll_estimate
+ds_hll_estimate_bounds
+ds_hll_sketch
+ds_hll_stringify
+ds_hll_union
+ds_hll_union_f
+ds_kll_cdf
+ds_kll_n
+ds_kll_pmf
+ds_kll_quantile
+ds_kll_quantiles
+ds_kll_rank
+ds_kll_sketch
+ds_kll_stringify
+ds_kll_union
+ds_quantile_doubles_cdf
+ds_quantile_doubles_k
+ds_quantile_doubles_n
+ds_quantile_doubles_pmf
+ds_quantile_doubles_quantile
+ds_quantile_doubles_quantiles
+ds_quantile_doubles_sketch
+ds_quantile_doubles_stringify
+ds_quantile_doubles_union
+ds_quantile_strings_cdf
+ds_quantile_strings_k
+ds_quantile_strings_n
+ds_quantile_strings_pmf
+ds_quantile_strings_quantile
+ds_quantile_strings_quantiles
+ds_quantile_strings_sketch
+ds_quantile_strings_stringify
+ds_quantile_strings_union
+ds_theta_estimate
+ds_theta_exclude
+ds_theta_intersect
+ds_theta_intersect_f
+ds_theta_sketch
+ds_theta_union
+ds_theta_union_f
+ds_tuple_arrayofdouble_estimate
+ds_tuple_arrayofdouble_estimate_bounds
+ds_tuple_arrayofdouble_means
+ds_tuple_arrayofdouble_n_retained
+ds_tuple_arrayofdouble_quantiles_sketch
+ds_tuple_arrayofdouble_sketch
+ds_tuple_arrayofdouble_ttest
+ds_tuple_arrayofdouble_union
+ds_tuple_arrayofdouble_values
+ds_tuple_arrayofdouble_variances
+ds_tuple_doublesummary_estimate
+ds_tuple_doublesummary_percentile
+ds_tuple_doublesummary_sketch
+ds_tuple_doublesummary_union
 e
 elt
 encode
@@ -392,6 +455,16 @@ coalesce
 current_database
 current_date
 decode
+ds_cpc_estimate
+ds_hll_estimate
+ds_kll_quantile
+ds_quantile_doubles_quantile
+ds_quantile_strings_quantile
+ds_theta_estimate
+ds_theta_exclude
+ds_tuple_arrayofdouble_estimate
+ds_tuple_doublesummary_estimate
+ds_tuple_doublesummary_percentile
 e
 encode
 explode
@@ -540,6 +613,69 @@ decode
 degrees
 dense_rank
 div
+ds_cpc_estimate
+ds_cpc_estimate_bounds
+ds_cpc_sketch
+ds_cpc_stringify
+ds_cpc_union
+ds_cpc_union_f
+ds_freq_frequent_items
+ds_freq_sketch
+ds_freq_union
+ds_hll_estimate
+ds_hll_estimate_bounds
+ds_hll_sketch
+ds_hll_stringify
+ds_hll_union
+ds_hll_union_f
+ds_kll_cdf
+ds_kll_n
+ds_kll_pmf
+ds_kll_quantile
+ds_kll_quantiles
+ds_kll_rank
+ds_kll_sketch
+ds_kll_stringify
+ds_kll_union
+ds_quantile_doubles_cdf
+ds_quantile_doubles_k
+ds_quantile_doubles_n
+ds_quantile_doubles_pmf
+ds_quantile_doubles_quantile
+ds_quantile_doubles_quantiles
+ds_quantile_doubles_sketch
+ds_quantile_doubles_stringify
+ds_quantile_doubles_union
+ds_quantile_strings_cdf
+ds_quantile_strings_k
+ds_quantile_strings_n
+ds_quantile_strings_pmf
+ds_quantile_strings_quantile
+ds_quantile_strings_quantiles
+ds_quantile_strings_sketch
+ds_quantile_strings_stringify
+ds_quantile_strings_union
+ds_theta_estimate
+ds_theta_exclude
+ds_theta_intersect
+ds_theta_intersect_f
+ds_theta_sketch
+ds_theta_union
+ds_theta_union_f
+ds_tuple_arrayofdouble_estimate
+ds_tuple_arrayofdouble_estimate_bounds
+ds_tuple_arrayofdouble_means
+ds_tuple_arrayofdouble_n_retained
+ds_tuple_arrayofdouble_quantiles_sketch
+ds_tuple_arrayofdouble_sketch
+ds_tuple_arrayofdouble_ttest
+ds_tuple_arrayofdouble_union
+ds_tuple_arrayofdouble_values
+ds_tuple_arrayofdouble_variances
+ds_tuple_doublesummary_estimate
+ds_tuple_doublesummary_percentile
+ds_tuple_doublesummary_sketch
+ds_tuple_doublesummary_union
 e
 elt
 encode