You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2022/11/21 08:51:40 UTC
[hive] branch master updated: HIVE-26243: Add vectorized implementation of the 'ds_kll_sketch' UDAF (Alessandro Solimando, reviewed by Denys Kuzmenko, Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ad19ec3022a HIVE-26243: Add vectorized implementation of the 'ds_kll_sketch' UDAF (Alessandro Solimando, reviewed by Denys Kuzmenko, Zoltan Haindrich)
ad19ec3022a is described below
commit ad19ec3022a35bee4d618bd8992d9ce0f67be5b7
Author: Alessandro Solimando <al...@gmail.com>
AuthorDate: Mon Nov 21 09:51:35 2022 +0100
HIVE-26243: Add vectorized implementation of the 'ds_kll_sketch' UDAF (Alessandro Solimando, reviewed by Denys Kuzmenko, Zoltan Haindrich)
Closes #3317
---
pom.xml | 11 +
.../UDAFTemplates/VectorUDAFComputeDsKllSketch.txt | 314 ++++++++
.../hadoop/hive/ql/optimizer/MapJoinProcessor.java | 2 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 90 ++-
.../datasketches/kll/KllHistogramEstimator.java | 78 ++
.../kll/KllHistogramEstimatorFactory.java | 47 ++
.../hive/ql/udf/datasketches/kll/KllUtils.java | 97 +++
.../queries/clientpositive/compute_kll_sketch.q | 67 ++
.../clientpositive/llap/compute_kll_sketch.q.out | 829 +++++++++++++++++++++
.../sketches_materialized_view_cume_dist.q.out | 6 +-
.../llap/sketches_materialized_view_ntile.q.out | 6 +-
...ketches_materialized_view_percentile_disc.q.out | 6 +-
.../llap/sketches_materialized_view_rank.q.out | 6 +-
.../llap/sketches_rewrite_cume_dist.q.out | 16 +-
.../sketches_rewrite_cume_dist_partition_by.q.out | 4 +-
.../llap/sketches_rewrite_ntile.q.out | 12 +-
.../llap/sketches_rewrite_ntile_partition_by.q.out | 4 +-
.../llap/sketches_rewrite_percentile_disc.q.out | 4 +-
.../llap/sketches_rewrite_rank.q.out | 16 +-
.../llap/sketches_rewrite_rank_partition_by.q.out | 4 +-
.../apache/hadoop/hive/tools/GenVectorCode.java | 11 +-
21 files changed, 1552 insertions(+), 78 deletions(-)
diff --git a/pom.xml b/pom.xml
index 6d687255412..6a7a02ea7a0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -609,6 +609,17 @@
<artifactId>calcite-druid</artifactId>
<version>${calcite.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.datasketches</groupId>
+ <artifactId>datasketches-hive</artifactId>
+ <version>${datasketches.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFComputeDsKllSketch.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFComputeDsKllSketch.txt
new file mode 100644
index 00000000000..7ab07d5bd4f
--- /dev/null
+++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFComputeDsKllSketch.txt
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates;
+
+import org.apache.hadoop.hive.ql.udf.datasketches.kll.KllHistogramEstimator;
+import org.apache.hadoop.hive.ql.udf.datasketches.kll.KllHistogramEstimatorFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+#IF COMPLETE
+import org.apache.hadoop.hive.ql.exec.vector.<InputColumnVectorType>;
+#ENDIF COMPLETE
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+
+/**
+ * Generated from template VectorUDAFComputeDsKllSketch.txt.
+ */
+@Description(name = "ds_kll_sketch", value = "_FUNC_(x) "
+ + "Returns a KllFloatsSketch in a serialized form as a binary blob."
+ + " Values must be of type float.")
+public class <ClassName> extends VectorAggregateExpression {
+
+ private transient int k;
+
+ public <ClassName>() {
+ super();
+ }
+
+ public <ClassName>(VectorAggregationDesc vecAggrDesc) {
+ this(vecAggrDesc, 200);
+ }
+
+ public <ClassName>(VectorAggregationDesc vecAggrDesc, int k) {
+ super(vecAggrDesc);
+ this.k = k;
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new Aggregation();
+ }
+
+ @Override
+ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException {
+ inputExpression.evaluate(batch);
+
+#IF COMPLETE
+ <InputColumnVectorType> inputColumn = (<InputColumnVectorType>) batch.cols[this.inputExpression.getOutputColumnNum()];
+#ENDIF COMPLETE
+#IF MERGING
+ BytesColumnVector inputColumn = (BytesColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
+#ENDIF MERGING
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+ Aggregation myagg = (Aggregation) agg;
+
+#IF COMPLETE
+ myagg.prepare(k);
+ if (inputColumn.noNulls) {
+ if (inputColumn.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ myagg.estimator.addToEstimator(inputColumn.vector[0]);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int s = 0; s < batchSize; s++) {
+ int i = batch.selected[s];
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ }
+ } else {
+ if (inputColumn.isRepeating) {
+ if (!inputColumn.isNull[0]) {
+ for (int i = 0; i < batchSize; i++) {
+ myagg.estimator.addToEstimator(inputColumn.vector[0]);
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int j = 0; j < batchSize; ++j) {
+ int i = batch.selected[j];
+ if (!inputColumn.isNull[i]) {
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ if (!inputColumn.isNull[i]) {
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ }
+ }
+ }
+#ENDIF COMPLETE
+#IF MERGING
+ if (inputColumn.isRepeating) {
+ if (!inputColumn.isNull[0] && inputColumn.length[0] > 0) {
+ KllHistogramEstimator mergingKLL = KllHistogramEstimatorFactory.getKllHistogramEstimator(
+ inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]);
+ myagg.prepare(mergingKLL.getK());
+ myagg.estimator.mergeEstimators(mergingKLL);
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ int s = i;
+ if (batch.selectedInUse) {
+ s = batch.selected[i];
+ }
+ if (!inputColumn.isNull[s] && inputColumn.length[s] > 0) {
+ KllHistogramEstimator mergingKLL = KllHistogramEstimatorFactory.getKllHistogramEstimator(
+ inputColumn.vector[s], inputColumn.start[s], inputColumn.length[s]);
+ myagg.prepare(mergingKLL.getK());
+ myagg.estimator.mergeEstimators(mergingKLL);
+ }
+ }
+ }
+#ENDIF MERGING
+ }
+
+ private Aggregation getAggregation(VectorAggregationBufferRow[] sets, int rowid, int bufferIndex) {
+ VectorAggregationBufferRow bufferRow = sets[rowid];
+ Aggregation myagg = (Aggregation) bufferRow.getAggregationBuffer(bufferIndex);
+ myagg.prepare(k);
+ return myagg;
+ }
+
+ @Override
+ public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex,
+ VectorizedRowBatch batch) throws HiveException {
+ inputExpression.evaluate(batch);
+
+#IF COMPLETE
+ <InputColumnVectorType> inputColumn = (<InputColumnVectorType>) batch.cols[this.inputExpression.getOutputColumnNum()];
+#ENDIF COMPLETE
+#IF MERGING
+ BytesColumnVector inputColumn = (BytesColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
+#ENDIF MERGING
+
+ int batchSize = batch.size;
+
+ if (batchSize == 0) {
+ return;
+ }
+
+#IF COMPLETE
+ if (inputColumn.noNulls) {
+ if (inputColumn.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[0]);
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int s = 0; s < batchSize; s++) {
+ int i = batch.selected[s];
+ Aggregation myagg = getAggregation(aggregationBufferSets, s, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ }
+ } else {
+ if (inputColumn.isRepeating) {
+ if (!inputColumn.isNull[0]) {
+ for (int i = 0; i < batchSize; i++) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[0]);
+ }
+ }
+ } else {
+ if (batch.selectedInUse) {
+ for (int s = 0; s < batchSize; s++) {
+ int i = batch.selected[s];
+ if (!inputColumn.isNull[i]) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, s, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ if (!inputColumn.isNull[i]) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ myagg.estimator.addToEstimator(inputColumn.vector[i]);
+ }
+ }
+ }
+ }
+ }
+#ENDIF COMPLETE
+#IF MERGING
+ if (inputColumn.isRepeating) {
+ if (!inputColumn.isNull[0] && inputColumn.length[0] > 0) {
+ for (int i = 0; i < batchSize; i++) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ KllHistogramEstimator mergingKLL = KllHistogramEstimatorFactory.getKllHistogramEstimator(
+ inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]);
+ myagg.estimator.mergeEstimators(mergingKLL);
+ }
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ int s = i;
+ if (batch.selectedInUse) {
+ s = batch.selected[i];
+ }
+ if (!inputColumn.isNull[s] && inputColumn.length[s] > 0) {
+ Aggregation myagg = getAggregation(aggregationBufferSets, i, aggregateIndex);
+ KllHistogramEstimator mergingKLL = KllHistogramEstimatorFactory.getKllHistogramEstimator(
+ inputColumn.vector[s], inputColumn.start[s], inputColumn.length[s]);
+ myagg.estimator.mergeEstimators(mergingKLL);
+ }
+ }
+ }
+#ENDIF MERGING
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ agg.reset();
+ }
+
+ @Override
+ public long getAggregationBufferFixedSize() {
+ return 0;
+ }
+
+ @Override
+ public boolean matches(String name, ColumnVector.Type inputColVectorType, ColumnVector.Type outputColVectorType,
+ GenericUDAFEvaluator.Mode mode) {
+ return name.equals("ds_kll_sketch") &&
+ outputColVectorType == ColumnVector.Type.BYTES &&
+#IF MERGING
+ inputColVectorType == ColumnVector.Type.BYTES &&
+ (mode == GenericUDAFEvaluator.Mode.PARTIAL2 || mode == GenericUDAFEvaluator.Mode.FINAL);
+#ENDIF MERGING
+#IF COMPLETE
+ inputColVectorType == ColumnVector.Type.<UpperCaseColumnVectorType> &&
+ (mode == GenericUDAFEvaluator.Mode.PARTIAL1 || mode == GenericUDAFEvaluator.Mode.COMPLETE);
+#ENDIF COMPLETE
+ }
+
+ @Override
+ public void assignRowColumn(
+ VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
+ Aggregation myagg = (Aggregation) agg;
+ BytesColumnVector outputCol = (BytesColumnVector) batch.cols[columnNum];
+ if (myagg.estimator == null) {
+ outputCol.isNull[batchIndex] = true;
+ outputCol.noNulls = false;
+ } else {
+ outputCol.isNull[batchIndex] = false;
+ outputCol.isRepeating = false;
+ byte[] outputbuf = myagg.estimator.serialize();
+ outputCol.setRef(batchIndex, outputbuf, 0, outputbuf.length);
+ }
+ }
+
+ static class Aggregation implements AggregationBuffer {
+
+ KllHistogramEstimator estimator;
+
+ @Override
+ public int getVariableSize() {
+ return estimator.lengthFor(JavaDataModel.get());
+ }
+
+ @Override
+ public void reset() {
+ estimator = null;
+ }
+
+ public void prepare(int k) {
+ if (estimator == null) {
+ estimator = KllHistogramEstimatorFactory.getEmptyHistogramEstimator(k);
+ }
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
index acfaef7a354..e922ce47796 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
@@ -464,7 +464,7 @@ public class MapJoinProcessor extends Transform {
HiveConf.getVar(hiveConf,
HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUTER_OVERRIDE);
EnabledOverride mapJoinFullOuterOverride =
- EnabledOverride.nameMap.get(testMapJoinFullOuterOverrideString);
+ EnabledOverride.NAME_MAP.get(testMapJoinFullOuterOverrideString);
final boolean isEnabled =
HiveConf.getBoolVar(
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 8e3408316fb..46ea0bcfc0c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -39,12 +39,15 @@ import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorCoalesce;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFComputeDsKllSketchDouble;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFComputeDsKllSketchFinal;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColDivideDecimalScalar;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinAntiJoinLongOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinAntiJoinMultiKeyOperator;
@@ -247,7 +250,7 @@ import com.google.common.base.Preconditions;
public class Vectorizer implements PhysicalPlanResolver {
- protected static transient final Logger LOG = LoggerFactory.getLogger(Vectorizer.class);
+ protected static final Logger LOG = LoggerFactory.getLogger(Vectorizer.class);
private static final Pattern supportedDataTypesPattern;
@@ -286,9 +289,11 @@ public class Vectorizer implements PhysicalPlanResolver {
supportedDataTypesPattern = Pattern.compile(patternBuilder.toString());
}
- private Set<Class<?>> supportedGenericUDFs = new HashSet<>();
+ private final Set<Class<?>> supportedGenericUDFs = new HashSet<>();
- private Set<String> supportedAggregationUdfs = new HashSet<>();
+ private final Set<String> supportedAggregationUdfs = Arrays.stream(VECTORIZABLE_UDAF.values())
+ .map(e -> e.name().toLowerCase())
+ .collect(Collectors.toSet());
// The set of virtual columns that vectorized readers *MAY* support.
public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns =
@@ -296,18 +301,46 @@ public class Vectorizer implements PhysicalPlanResolver {
private HiveConf hiveConf;
+ private enum VECTORIZABLE_UDAF {
+ MIN,
+ MAX,
+ COUNT,
+ SUM,
+ AVG,
+ VARIANCE,
+ VAR_POP,
+ VAR_SAMP,
+ STD,
+ STDDEV,
+ STDDEV_POP,
+ STDDEV_SAMP,
+ BLOOM_FILTER,
+ COMPUTE_BIT_VECTOR_HLL,
+ DS_KLL_SKETCH;
+
+ @Override
+ public String toString() {
+ return name().toLowerCase();
+ }
+ }
+
public enum EnabledOverride {
NONE,
DISABLE,
ENABLE;
- public static final Map<String, EnabledOverride> nameMap = new HashMap<>();
+ public static final Map<String, EnabledOverride> NAME_MAP = new HashMap<>();
static {
for (EnabledOverride vectorizationEnabledOverride : values()) {
- nameMap.put(
- vectorizationEnabledOverride.name().toLowerCase(), vectorizationEnabledOverride);
+ NAME_MAP.put(
+ vectorizationEnabledOverride.toString(), vectorizationEnabledOverride);
}
- };
+ }
+
+ @Override
+ public String toString() {
+ return name().toLowerCase();
+ }
}
private boolean isVectorizationEnabled;
@@ -510,21 +543,6 @@ public class Vectorizer implements PhysicalPlanResolver {
// For conditional expressions
supportedGenericUDFs.add(GenericUDFIf.class);
-
- supportedAggregationUdfs.add("min");
- supportedAggregationUdfs.add("max");
- supportedAggregationUdfs.add("count");
- supportedAggregationUdfs.add("sum");
- supportedAggregationUdfs.add("avg");
- supportedAggregationUdfs.add("variance");
- supportedAggregationUdfs.add("var_pop");
- supportedAggregationUdfs.add("var_samp");
- supportedAggregationUdfs.add("std");
- supportedAggregationUdfs.add("stddev");
- supportedAggregationUdfs.add("stddev_pop");
- supportedAggregationUdfs.add("stddev_samp");
- supportedAggregationUdfs.add(BLOOM_FILTER_FUNCTION);
- supportedAggregationUdfs.add("compute_bit_vector_hll");
}
private class VectorTaskColumnInfo {
@@ -2395,7 +2413,7 @@ public class Vectorizer implements PhysicalPlanResolver {
HiveConf.getVar(hiveConf,
HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE);
vectorizationEnabledOverride =
- EnabledOverride.nameMap.get(vectorizationEnabledOverrideString);
+ EnabledOverride.NAME_MAP.get(vectorizationEnabledOverrideString);
isVectorizationEnabled = HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
@@ -4470,17 +4488,23 @@ public class Vectorizer implements PhysicalPlanResolver {
VectorizationContext vContext)
throws HiveException {
- VectorizedUDAFs annotation =
- AnnotationUtils.getAnnotation(evaluator.getClass(), VectorizedUDAFs.class);
- if (annotation == null) {
- String issue =
- "Evaluator " + evaluator.getClass().getSimpleName() + " does not have a " +
- "vectorized UDAF annotation (aggregation: \"" + aggregationName + "\"). " +
- "Vectorization not supported";
- return new ImmutablePair<VectorAggregationDesc,String>(null, issue);
+ Class<? extends VectorAggregateExpression>[] vecAggrClasses;
+ // "ds_kll_sketch" needs special treatment because the UDAF is coming from data
+ // sketches library, we cannot add annotations there
+ if (aggregationName.equals(VECTORIZABLE_UDAF.DS_KLL_SKETCH.toString())) {
+ vecAggrClasses = new Class[] {
+ VectorUDAFComputeDsKllSketchDouble.class, VectorUDAFComputeDsKllSketchFinal.class
+ };
+ } else {
+ VectorizedUDAFs annotation =
+ AnnotationUtils.getAnnotation(evaluator.getClass(), VectorizedUDAFs.class);
+ if (annotation == null) {
+ String issue = "Evaluator " + evaluator.getClass().getSimpleName() + " does not have a "
+ + "vectorized UDAF annotation (aggregation: \"" + aggregationName + "\"). " + "Vectorization not supported";
+ return new ImmutablePair<>(null, issue);
+ }
+ vecAggrClasses = annotation.value();
}
- final Class<? extends VectorAggregateExpression>[] vecAggrClasses = annotation.value();
-
// Not final since it may change later due to DECIMAL_64.
ColumnVector.Type outputColVectorType =
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimator.java
new file mode 100644
index 00000000000..4d0777c3ff9
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimator.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.datasketches.kll;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+public class KllHistogramEstimator {
+
+ private final KllFloatsSketch kll;
+
+ KllHistogramEstimator(int k) {
+ this.kll = new KllFloatsSketch(k);
+ }
+
+ KllHistogramEstimator(KllFloatsSketch kll) {
+ this.kll = kll;
+ }
+
+ public byte[] serialize() {
+ final ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ try {
+ KllUtils.serializeKll(bos, kll);
+ final byte[] result = bos.toByteArray();
+ bos.close();
+ return result;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void addToEstimator(long v) {
+ kll.update(v);
+ }
+
+ public void addToEstimator(double d) {
+ kll.update((float) d);
+ }
+
+ public void addToEstimator(HiveDecimal decimal) {
+ kll.update(decimal.floatValue());
+ }
+
+ public void mergeEstimators(KllHistogramEstimator o) {
+ kll.merge(o.kll);
+ }
+
+ public int lengthFor(JavaDataModel model) {
+ return KllUtils.lengthFor(model, kll);
+ }
+
+ public KllFloatsSketch getSketch() {
+ return kll;
+ }
+
+ public int getK() {
+ return kll.getK();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimatorFactory.java
new file mode 100644
index 00000000000..72dc7d3b400
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllHistogramEstimatorFactory.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.datasketches.kll;
+
+public class KllHistogramEstimatorFactory {
+
+ private KllHistogramEstimatorFactory() {
+ throw new AssertionError("Suppress default constructor for non instantiation");
+ }
+
+ /**
+ * This function deserializes the serialized KLL histogram estimator from a byte array.
+ * @param buf to deserialize
+ * @param start start index for deserialization
+ * @param len start+len is deserialized
+ * @return KLL histogram estimator
+ */
+ public static KllHistogramEstimator getKllHistogramEstimator(byte[] buf, int start, int len) {
+ return new KllHistogramEstimator(KllUtils.deserializeKll(buf, start, len));
+ }
+
+ /**
+ * This method creates an empty histogram estimator with a KLL sketch of a given k parameter.
+ * @param k the KLL parameter k for initializing the sketch
+ * @return an empty histogram estimator with a KLL sketch of a given k parameter
+ */
+ public static KllHistogramEstimator getEmptyHistogramEstimator(int k) {
+ return new KllHistogramEstimator(k);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllUtils.java
new file mode 100644
index 00000000000..2d9c08b586d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/datasketches/kll/KllUtils.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.datasketches.kll;
+
+import org.apache.datasketches.kll.KllFloatsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * KLL serialization utilities.
+ */
+public class KllUtils {
+
+ private KllUtils() {
+ throw new AssertionError("Suppress default constructor for non instantiation");
+ }
+
+ /**
+ * KLL is serialized according to what provided by data-sketches library
+ * @param out output stream to write to
+ * @param kll KLL sketch that needs to be serialized
+ * @throws IOException if an error occurs during serialization
+ */
+ public static void serializeKll(OutputStream out, KllFloatsSketch kll) throws IOException {
+ out.write(kll.toByteArray());
+ }
+
+ /**
+ * This function deserializes the serialized KLL sketch from a stream.
+ * @param in input stream to be deserialized
+ * @return KLL sketch
+ * @throws IOException if errors occur while reading the stream
+ */
+ public static KllFloatsSketch deserializeKll(InputStream in) throws IOException {
+ final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ final byte[] data = new byte[4];
+ int nRead;
+
+ while ((nRead = in.read(data, 0, data.length)) != -1) {
+ buffer.write(data, 0, nRead);
+ }
+
+ buffer.flush();
+ return KllFloatsSketch.heapify(Memory.wrap(buffer.toByteArray()));
+ }
+
+ /**
+ * This function deserializes the serialized KLL sketch from a byte array.
+ * @param buf to deserialize
+ * @param start start index for deserialization
+ * @param len start+len is deserialized
+ * @return KLL sketch
+ */
+ public static KllFloatsSketch deserializeKll(byte[] buf, int start, int len) {
+ InputStream is = new ByteArrayInputStream(buf, start, len);
+ try {
+ KllFloatsSketch result = deserializeKll(is);
+ is.close();
+ return result;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Returns the length of the given KLL sketch according to the given java data model.
+ * @param model the java data model to compute the length
+ * @param kll the KLL sketch to compute the length for
+ * @return the length of the given KLL sketch according to the given java data model
+ */
+ public static int lengthFor(JavaDataModel model, KllFloatsSketch kll) {
+ return model == null ? KllFloatsSketch.getMaxSerializedSizeBytes(kll.getK(), kll.getN())
+ : (int) model.lengthForByteArrayOfSize(kll.getSerializedSizeBytes());
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/compute_kll_sketch.q b/ql/src/test/queries/clientpositive/compute_kll_sketch.q
new file mode 100644
index 00000000000..c7ff5d64363
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/compute_kll_sketch.q
@@ -0,0 +1,67 @@
+--! qt:dataset:src
+--! qt:dataset:alltypesorc
+
+-- check that both call (aggregation column alone, aggregation column + sketch size)
+-- work with vectorization and that the computed values coincide
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc;
+-- compare it against the non-vectorized execution
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc;
+
+-- change the k parameter (data sketch size) for KLL to see if it's actually used
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc;
+
+-- START: series of tests covering different data types
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc;
+
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc;
+
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc;
+
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc;
+
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc;
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc;
+select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc;
+-- END: series of tests covering different data types
+
+-- testing that the KLL sketch of two identical columns is equal
+create table test_compute_kll (key1 int, key2 int);
+insert overwrite table test_compute_kll select a.key, b.key from src as a, src as b;
+
+set hive.vectorized.execution.enabled=true;
+select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll;
+select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll;
+
+set hive.vectorized.execution.enabled=false;
+select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll;
+select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll;
diff --git a/ql/src/test/results/clientpositive/llap/compute_kll_sketch.q.out b/ql/src/test/results/clientpositive/llap/compute_kll_sketch.q.out
new file mode 100644
index 00000000000..b463c5ed5c8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/compute_kll_sketch.q.out
@@ -0,0 +1,829 @@
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 300
+ min K : 300
+ M : 8
+ N : 9173
+ Epsilon : 0.896%
+ Epsison PMF : 1.127%
+ Empty : false
+ Estimation Mode : true
+ Levels : 5
+ Sorted : false
+ Buffer Capacity Items: 781
+ Retained Items : 769
+ Storage Bytes : 3124
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (ctinyint as float), 300)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 300
+ min K : 300
+ M : 8
+ N : 9173
+ Epsilon : 0.896%
+ Epsison PMF : 1.127%
+ Empty : false
+ Estimation Mode : true
+ Levels : 5
+ Sorted : false
+ Buffer Capacity Items: 781
+ Retained Items : 769
+ Storage Bytes : 3124
+ Min Value : -64.0
+ Max Value : 62.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 16376.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 16376.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 16376.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (csmallint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 16376.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -1.07327936E9
+ Max Value : 1.07368058E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -1.07327936E9
+ Max Value : 1.07368058E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -1.07327936E9
+ Max Value : 1.07368058E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -1.07327936E9
+ Max Value : 1.07368058E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -2.14731162E9
+ Max Value : 2.14549837E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -2.14731162E9
+ Max Value : 2.14549837E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -2.14731162E9
+ Max Value : 2.14549837E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cbigint as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -2.14731162E9
+ Max Value : 2.14549837E9
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 79.553
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 79.553
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat, 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 79.553
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cfloat)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9173
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 535
+ Storage Bytes : 2192
+ Min Value : -64.0
+ Max Value : 79.553
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 9763216.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 9763216.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float), 200)) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 9763216.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (cdouble as float))) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 9174
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 6
+ Sorted : false
+ Buffer Capacity Items: 547
+ Retained Items : 536
+ Storage Bytes : 2196
+ Min Value : -16379.0
+ Max Value : 9763216.0
+### End sketch summary
+
+PREHOOK: query: create table test_compute_kll (key1 int, key2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_compute_kll
+POSTHOOK: query: create table test_compute_kll (key1 int, key2 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_compute_kll
+Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: insert overwrite table test_compute_kll select a.key, b.key from src as a, src as b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_compute_kll
+POSTHOOK: query: insert overwrite table test_compute_kll select a.key, b.key from src as a, src as b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_compute_kll
+POSTHOOK: Lineage: test_compute_kll.key1 EXPRESSION [(src)a.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_compute_kll.key2 EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 250000
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 11
+ Sorted : false
+ Buffer Capacity Items: 601
+ Retained Items : 598
+ Storage Bytes : 2464
+ Min Value : 0.0
+ Max Value : 498.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 250000
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 11
+ Sorted : false
+ Buffer Capacity Items: 601
+ Retained Items : 598
+ Storage Bytes : 2464
+ Min Value : 0.0
+ Max Value : 498.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key1 as float))) from test_compute_kll
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 250000
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 11
+ Sorted : false
+ Buffer Capacity Items: 601
+ Retained Items : 598
+ Storage Bytes : 2464
+ Min Value : 0.0
+ Max Value : 498.0
+### End sketch summary
+
+PREHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_kll_stringify(ds_kll_sketch(cast (key2 as float))) from test_compute_kll
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_compute_kll
+#### A masked pattern was here ####
+
+### KLL sketch summary:
+ K : 200
+ min K : 200
+ M : 8
+ N : 250000
+ Epsilon : 1.329%
+ Epsison PMF : 1.652%
+ Empty : false
+ Estimation Mode : true
+ Levels : 11
+ Sorted : false
+ Buffer Capacity Items: 601
+ Retained Items : 598
+ Storage Bytes : 2464
+ Min Value : 0.0
+ Max Value : 498.0
+### End sketch summary
+
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_cume_dist.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_cume_dist.q.out
index 86d9bd2cd8a..723901d0d25 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_cume_dist.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_cume_dist.q.out
@@ -395,7 +395,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -433,7 +433,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -567,7 +567,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_ntile.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_ntile.q.out
index 0cc4f720b74..57807528e42 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_ntile.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_ntile.q.out
@@ -398,7 +398,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -436,7 +436,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -571,7 +571,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out
index aff6f076136..e9452e24cfc 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_percentile_disc.q.out
@@ -239,10 +239,10 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -335,7 +335,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rank.q.out b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rank.q.out
index 7a815182a9c..26a22927882 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rank.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_materialized_view_rank.q.out
@@ -398,7 +398,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -436,7 +436,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -571,7 +571,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 2 Data size: 458 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Map 6
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist.q.out
index 050e0838473..288359372fc 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist.q.out
@@ -148,7 +148,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -186,7 +186,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -299,7 +299,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -371,7 +371,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -493,7 +493,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -531,7 +531,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -645,7 +645,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -683,7 +683,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist_partition_by.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist_partition_by.q.out
index 64235ba13f5..ac90339391f 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist_partition_by.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_cume_dist_partition_by.q.out
@@ -120,7 +120,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -159,7 +159,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile.q.out
index 096e7e22e6e..fc06a82e53f 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile.q.out
@@ -123,7 +123,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -161,7 +161,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -285,7 +285,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -323,7 +323,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -439,7 +439,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -477,7 +477,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile_partition_by.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile_partition_by.q.out
index bfad651dbee..22b0bca67c0 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile_partition_by.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_ntile_partition_by.q.out
@@ -159,7 +159,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -198,7 +198,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_percentile_disc.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_percentile_disc.q.out
index d6c691157eb..8866944ced7 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_percentile_disc.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_percentile_disc.q.out
@@ -75,10 +75,10 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank.q.out
index 2404321a261..8357951d893 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank.q.out
@@ -123,7 +123,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -161,7 +161,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -276,7 +276,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -348,7 +348,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -472,7 +472,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -510,7 +510,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
@@ -626,7 +626,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: boolean)
Statistics: Num rows: 1 Data size: 148 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -664,7 +664,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank_partition_by.q.out b/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank_partition_by.q.out
index e0052a4cba1..90dbb585e33 100644
--- a/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank_partition_by.q.out
+++ b/ql/src/test/results/clientpositive/llap/sketches_rewrite_rank_partition_by.q.out
@@ -125,7 +125,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(1))
Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: binary)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 2
Execution mode: llap
@@ -164,7 +164,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: ds_kll_sketch(VALUE._col0)
diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
index 358680161ae..998b19cb0e2 100644
--- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
+++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java
@@ -1214,10 +1214,15 @@ public class GenVectorCode extends Task {
{"VectorUDAFSum", "VectorUDAFSumLong", "long"},
{"VectorUDAFSum", "VectorUDAFSumDouble", "double"},
+ // "long" as <ValueType> for "MERGING" is ignored
{"VectorUDAFComputeBitVector", "VectorUDAFComputeBitVectorFinal", "long", "MERGING"},
{"VectorUDAFComputeBitVector", "VectorUDAFComputeBitVectorLong", "long", "COMPLETE"},
{"VectorUDAFComputeBitVector", "VectorUDAFComputeBitVectorDouble", "double", "COMPLETE"},
+ // "double" as <ValueType> for "MERGING" is ignored
+ {"VectorUDAFComputeDsKllSketch", "VectorUDAFComputeDsKllSketchFinal", "double", "MERGING"},
+ {"VectorUDAFComputeDsKllSketch", "VectorUDAFComputeDsKllSketchDouble", "double", "COMPLETE"},
+
// Template, <ClassName>, <ValueType>, <IfDefined>
{"VectorUDAFAvg", "VectorUDAFAvgLong", "long", "PARTIAL1"},
@@ -1471,7 +1476,9 @@ public class GenVectorCode extends Task {
} else if (tdesc[0].equals("VectorUDAFAvgDecimalMerge")) {
generateVectorUDAFAvgMerge(tdesc);
} else if (tdesc[0].equals("VectorUDAFComputeBitVector")) {
- generateVectorUDAFComputeBitVector(tdesc);
+ generateVectorUDAFDataSummary(tdesc);
+ } else if (tdesc[0].equals("VectorUDAFComputeDsKllSketch")) {
+ generateVectorUDAFDataSummary(tdesc);
} else if (tdesc[0].equals("VectorUDAFVar")) {
generateVectorUDAFVar(tdesc);
} else if (tdesc[0].equals("VectorUDAFVarDecimal")) {
@@ -1940,7 +1947,7 @@ public class GenVectorCode extends Task {
className, templateString);
}
- private void generateVectorUDAFComputeBitVector(String[] tdesc) throws Exception {
+ private void generateVectorUDAFDataSummary(String[] tdesc) throws Exception {
String className = tdesc[1];
String valueType = tdesc[2];
String columnType = getColumnVectorType(valueType);