You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2019/03/27 09:23:33 UTC
[hive] 01/02: HIVE-16255: Support percentile_cont / percentile_disc
(Laszlo Bodor via Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit a7cd99d8c20ab4b38487fd84afd36d1682310b09
Author: Laszlo Bodor <bo...@gmail.com>
AuthorDate: Wed Mar 27 09:24:39 2019 +0100
HIVE-16255: Support percentile_cont / percentile_disc (Laszlo Bodor via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +
.../ql/udf/generic/GenericUDAFPercentileCont.java | 502 ++++++++++++
.../ql/udf/generic/GenericUDAFPercentileDisc.java | 139 ++++
.../udf/generic/TestGenericUDAFPercentileCont.java | 181 +++++
.../udf/generic/TestGenericUDAFPercentileDisc.java | 182 +++++
.../queries/clientpositive/udaf_percentile_cont.q | 69 ++
.../queries/clientpositive/udaf_percentile_disc.q | 69 ++
.../results/clientpositive/show_functions.q.out | 2 +
.../clientpositive/udaf_percentile_cont.q.out | 421 +++++++++++
.../clientpositive/udaf_percentile_cont_disc.q.out | 842 +++++++++++++++++++++
.../clientpositive/udaf_percentile_disc.q.out | 421 +++++++++++
11 files changed, 2830 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index ed41bef..253570b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -473,6 +473,8 @@ public final class FunctionRegistry {
system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter());
system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct());
system.registerUDAF("percentile", UDAFPercentile.class);
+ system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont());
+ system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc());
// Generic UDFs
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java
new file mode 100644
index 0000000..72a19bd
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * GenericUDAFPercentileCont.
+ */
+@Description(name = "percentile_cont", value = "_FUNC_(input, pc) "
+ + "- Returns the percentile of expr at pc (range: [0,1]).")
+public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
+
+ private static final Comparator<LongWritable> LONG_COMPARATOR;
+ private static final Comparator<DoubleWritable> DOUBLE_COMPARATOR;
+
+ static {
+ LONG_COMPARATOR = ShimLoader.getHadoopShims().getLongComparator();
+ DOUBLE_COMPARATOR = new Comparator<DoubleWritable>() {
+ @Override
+ public int compare(DoubleWritable o1, DoubleWritable o2) {
+ return o1.compareTo(o2);
+ }
+ };
+ }
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case VOID:
+ return new PercentileContLongEvaluator();
+ case FLOAT:
+ case DOUBLE:
+ case DECIMAL:
+ return new PercentileContDoubleEvaluator();
+ case STRING:
+ case TIMESTAMP:
+ case VARCHAR:
+ case CHAR:
+ case BOOLEAN:
+ case DATE:
+ default:
+ throw new UDFArgumentTypeException(0,
+ "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * A comparator to sort the entries in order - Long.
+ */
+ @SuppressWarnings("serial")
+ public static class LongComparator
+ implements Comparator<Map.Entry<LongWritable, LongWritable>>, Serializable {
+ @Override
+ public int compare(Map.Entry<LongWritable, LongWritable> o1,
+ Map.Entry<LongWritable, LongWritable> o2) {
+ return LONG_COMPARATOR.compare(o1.getKey(), o2.getKey());
+ }
+ }
+
+ /**
+ * A comparator to sort the entries in order - Double.
+ */
+ @SuppressWarnings("serial")
+ public static class DoubleComparator
+ implements Comparator<Map.Entry<DoubleWritable, LongWritable>>, Serializable {
+ @Override
+ public int compare(Map.Entry<DoubleWritable, LongWritable> o1,
+ Map.Entry<DoubleWritable, LongWritable> o2) {
+ return DOUBLE_COMPARATOR.compare(o1.getKey(), o2.getKey());
+ }
+ }
+
+ protected interface PercentileCalculator<U> {
+ double getPercentile(List<Map.Entry<U, LongWritable>> entriesList, double position);
+ }
+
+ /**
+ * An abstract class to hold the generic udf functions for calculating percentile.
+ */
+ public abstract static class PercentileContEvaluator<T, U> extends GenericUDAFEvaluator {
+ PercentileCalculator<U> calc = getCalculator();
+
+ /**
+ * A state class to store intermediate aggregation results.
+ */
+ public class PercentileAgg extends AbstractAggregationBuffer {
+ Map<U, LongWritable> counts;
+ List<DoubleWritable> percentiles;
+ }
+
+ // For PARTIAL1 and COMPLETE
+ protected PrimitiveObjectInspector inputOI;
+ MapObjectInspector countsOI;
+ ListObjectInspector percentilesOI;
+
+ // For PARTIAL1 and PARTIAL2
+ protected transient Object[] partialResult;
+
+ // FINAL and COMPLETE output
+ protected DoubleWritable result;
+
+ // PARTIAL2 and FINAL inputs
+ protected transient StructObjectInspector soi;
+ protected transient StructField countsField;
+ protected transient StructField percentilesField;
+
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ initInspectors(parameters);
+
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result
+ partialResult = new Object[2];
+
+ ArrayList<ObjectInspector> foi = getPartialInspectors();
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("counts");
+ fname.add("percentiles");
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+ } else { // ...for final result
+ result = new DoubleWritable(0);
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ protected abstract PercentileCalculator<U> getCalculator();
+
+ protected abstract ArrayList<ObjectInspector> getPartialInspectors();
+
+ protected abstract T getInput(Object object, PrimitiveObjectInspector inputOI);
+
+ protected abstract U wrapInput(T input);
+
+ protected abstract U copyInput(U input);
+
+ protected abstract void sortEntries(List<Entry<U, LongWritable>> entriesList);
+
+ protected void initInspectors(ObjectInspector[] parameters) {
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ } else { // ...for partial result as input
+ soi = (StructObjectInspector) parameters[0];
+
+ countsField = soi.getStructFieldRef("counts");
+ percentilesField = soi.getStructFieldRef("percentiles");
+
+ countsOI = (MapObjectInspector) countsField.getFieldObjectInspector();
+ percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector();
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ PercentileAgg agg = new PercentileAgg();
+ return agg;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ PercentileAgg percAgg = (PercentileAgg) agg;
+ if (percAgg.counts != null) {
+ percAgg.counts.clear();
+ }
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ PercentileAgg percAgg = (PercentileAgg) agg;
+ Double percentile = ((HiveDecimalWritable) parameters[1]).getHiveDecimal().doubleValue();
+
+ if (percAgg.percentiles == null) {
+ validatePercentile(percentile);
+ percAgg.percentiles = new ArrayList<DoubleWritable>(1);
+ percAgg.percentiles.add(new DoubleWritable(percentile));
+ }
+
+ if (parameters[0] == null) {
+ return;
+ }
+
+ T input = getInput(parameters[0], inputOI);
+
+ if (input != null) {
+ increment(percAgg, wrapInput(input), 1);
+ }
+ }
+
+ protected void increment(PercentileAgg s, U input, long i) {
+ if (s.counts == null) {
+ s.counts = new HashMap<U, LongWritable>();
+ }
+ LongWritable count = s.counts.get(input);
+ if (count == null) {
+ s.counts.put(copyInput(input), new LongWritable(i));
+ } else {
+ count.set(count.get() + i);
+ }
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial == null) {
+ return;
+ }
+
+ Object objCounts = soi.getStructFieldData(partial, countsField);
+ Object objPercentiles = soi.getStructFieldData(partial, percentilesField);
+
+ Map<U, LongWritable> counts = (Map<U, LongWritable>) countsOI.getMap(objCounts);
+ List<DoubleWritable> percentiles =
+ (List<DoubleWritable>) percentilesOI.getList(objPercentiles);
+
+ if (counts == null || percentiles == null) {
+ return;
+ }
+
+ PercentileAgg percAgg = (PercentileAgg) agg;
+
+ if (percAgg.percentiles == null) {
+ percAgg.percentiles = new ArrayList<DoubleWritable>(percentiles);
+ }
+
+ for (Map.Entry<U, LongWritable> e : counts.entrySet()) {
+ increment(percAgg, e.getKey(), e.getValue().get());
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ PercentileAgg percAgg = (PercentileAgg) agg;
+
+ // No input data.
+ if (percAgg.counts == null || percAgg.counts.size() == 0) {
+ return null;
+ }
+
+ // Get all items into an array and sort them.
+ Set<Map.Entry<U, LongWritable>> entries = percAgg.counts.entrySet();
+ List<Map.Entry<U, LongWritable>> entriesList =
+ new ArrayList<Map.Entry<U, LongWritable>>(entries);
+ sortEntries(entriesList);
+
+ // Accumulate the counts.
+ long total = getTotal(entriesList);
+
+ // Initialize the result.
+ if (result == null) {
+ result = new DoubleWritable();
+ }
+
+ calculatePercentile(percAgg, entriesList, total);
+
+ return result;
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ PercentileAgg percAgg = (PercentileAgg) agg;
+ partialResult[0] = percAgg.counts;
+ partialResult[1] = percAgg.percentiles;
+
+ return partialResult;
+ }
+
+ protected long getTotal(List<Map.Entry<U, LongWritable>> entriesList) {
+ long total = 0;
+ for (int i = 0; i < entriesList.size(); i++) {
+ LongWritable count = entriesList.get(i).getValue();
+ total += count.get();
+ count.set(total);
+ }
+ return total;
+ }
+
+ protected void validatePercentile(Double percentile) {
+ if (percentile < 0.0 || percentile > 1.0) {
+ throw new RuntimeException("Percentile value must be within the range of 0 to 1.");
+ }
+ }
+
+ protected void calculatePercentile(PercentileAgg percAgg,
+ List<Map.Entry<U, LongWritable>> entriesList, long total) {
+ // maxPosition is the 1.0 percentile
+ long maxPosition = total - 1;
+ double position = maxPosition * percAgg.percentiles.get(0).get();
+ result.set(calc.getPercentile(entriesList, position));
+ }
+
+ }
+
+ /**
+ * The evaluator for percentile computation based on long.
+ */
+ public static class PercentileContLongEvaluator
+ extends PercentileContEvaluator<Long, LongWritable> {
+
+ protected ArrayList<ObjectInspector> getPartialInspectors() {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.writableLongObjectInspector,
+ PrimitiveObjectInspectorFactory.writableLongObjectInspector));
+ foi.add(ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
+ return foi;
+ }
+
+ protected Long getInput(Object parameter, PrimitiveObjectInspector inputOI) {
+ return PrimitiveObjectInspectorUtils.getLong(parameter, inputOI);
+ }
+
+ protected LongWritable wrapInput(Long input) {
+ return new LongWritable(input);
+ }
+
+ protected LongWritable copyInput(LongWritable input) {
+ return new LongWritable(input.get());
+ }
+
+ protected void sortEntries(List<Entry<LongWritable, LongWritable>> entriesList) {
+ Collections.sort(entriesList, new LongComparator());
+ }
+
+ @Override
+ protected PercentileCalculator<LongWritable> getCalculator() {
+ return new PercentileContLongCalculator();
+ }
+ }
+
+ /**
+ * The evaluator for percentile computation based on double.
+ */
+ public static class PercentileContDoubleEvaluator
+ extends PercentileContEvaluator<Double, DoubleWritable> {
+ @Override
+ protected ArrayList<ObjectInspector> getPartialInspectors() {
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
+ PrimitiveObjectInspectorFactory.writableLongObjectInspector));
+ foi.add(ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
+ return foi;
+ }
+
+ @Override
+ protected Double getInput(Object parameter, PrimitiveObjectInspector inputOI) {
+ return PrimitiveObjectInspectorUtils.getDouble(parameter, inputOI);
+ }
+
+ @Override
+ protected DoubleWritable wrapInput(Double input) {
+ return new DoubleWritable(input);
+ }
+
+ protected DoubleWritable copyInput(DoubleWritable input) {
+ return new DoubleWritable(input.get());
+ }
+
+ protected void sortEntries(List<Entry<DoubleWritable, LongWritable>> entriesList) {
+ Collections.sort(entriesList, new DoubleComparator());
+ }
+
+ @Override
+ protected PercentileCalculator<DoubleWritable> getCalculator() {
+ return new PercentileContDoubleCalculator();
+ }
+ }
+
+ /**
+ * continuous percentile calculators
+ */
+ public static class PercentileContLongCalculator implements PercentileCalculator<LongWritable> {
+ /**
+ * Get the percentile value.
+ */
+ public double getPercentile(List<Entry<LongWritable, LongWritable>> entriesList,
+ double position) {
+ // We may need to do linear interpolation to get the exact percentile
+ long lower = (long) Math.floor(position);
+ long higher = (long) Math.ceil(position);
+
+ // Linear search since this won't take much time from the total execution anyway
+ // lower has the range of [0 .. total-1]
+ // The first entry with accumulated count (lower+1) corresponds to the lower position.
+ int i = 0;
+ while (entriesList.get(i).getValue().get() < lower + 1) {
+ i++;
+ }
+
+ long lowerKey = entriesList.get(i).getKey().get();
+ if (higher == lower) {
+ // no interpolation needed because position does not have a fraction
+ return lowerKey;
+ }
+
+ if (entriesList.get(i).getValue().get() < higher + 1) {
+ i++;
+ }
+ long higherKey = entriesList.get(i).getKey().get();
+
+ if (higherKey == lowerKey) {
+ // no interpolation needed because lower position and higher position has the same key
+ return lowerKey;
+ }
+
+ // Linear interpolation to get the exact percentile
+ return (higher - position) * lowerKey + (position - lower) * higherKey;
+ }
+ }
+
+ public static class PercentileContDoubleCalculator
+ implements PercentileCalculator<DoubleWritable> {
+
+ public double getPercentile(List<Map.Entry<DoubleWritable, LongWritable>> entriesList,
+ double position) {
+ long lower = (long) Math.floor(position);
+ long higher = (long) Math.ceil(position);
+
+ int i = 0;
+ while (entriesList.get(i).getValue().get() < lower + 1) {
+ i++;
+ }
+
+ double lowerKey = entriesList.get(i).getKey().get();
+ if (higher == lower) {
+ return lowerKey;
+ }
+
+ if (entriesList.get(i).getValue().get() < higher + 1) {
+ i++;
+ }
+ double higherKey = entriesList.get(i).getKey().get();
+
+ if (higherKey == lowerKey) {
+ return lowerKey;
+ }
+
+ return (higher - position) * lowerKey + (position - lower) * higherKey;
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java
new file mode 100644
index 0000000..3ac336e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * GenericUDAFPercentileDisc.
+ */
+@Description(name = "percentile_disc", value = "_FUNC_(input, pc) - "
+ + "Returns the percentile of expr at pc (range: [0,1]) without interpolation.")
+public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly 2 argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case VOID:
+ return new PercentileDiscLongEvaluator();
+ case FLOAT:
+ case DOUBLE:
+ case DECIMAL:
+ return new PercentileDiscDoubleEvaluator();
+ case STRING:
+ case TIMESTAMP:
+ case VARCHAR:
+ case CHAR:
+ case BOOLEAN:
+ case DATE:
+ default:
+ throw new UDFArgumentTypeException(0,
+ "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * The evaluator for discrete percentile computation based on long.
+ */
+ public static class PercentileDiscLongEvaluator extends PercentileContLongEvaluator {
+ @Override
+ protected PercentileCalculator<LongWritable> getCalculator() {
+ return new PercentileDiscLongCalculator();
+ }
+ }
+
+ /**
+ * The evaluator for discrete percentile computation based on double.
+ */
+ public static class PercentileDiscDoubleEvaluator extends PercentileContDoubleEvaluator {
+ @Override
+ protected PercentileCalculator<DoubleWritable> getCalculator() {
+ return new PercentileDiscDoubleCalculator();
+ }
+ }
+
+ public static class PercentileDiscLongCalculator implements PercentileCalculator<LongWritable> {
+ public double getPercentile(List<Map.Entry<LongWritable, LongWritable>> entriesList,
+ double position) {
+ long lower = (long) Math.floor(position);
+ long higher = (long) Math.ceil(position);
+
+ int i = 0;
+ while (entriesList.get(i).getValue().get() < lower + 1) {
+ i++;
+ }
+
+ long lowerKey = entriesList.get(i).getKey().get();
+ if (higher == lower) {
+ return lowerKey;
+ }
+
+ if (entriesList.get(i).getValue().get() < higher + 1) {
+ i++;
+ }
+ return entriesList.get(i).getKey().get();
+ }
+ }
+
+ public static class PercentileDiscDoubleCalculator
+ implements PercentileCalculator<DoubleWritable> {
+ public double getPercentile(List<Map.Entry<DoubleWritable, LongWritable>> entriesList,
+ double position) {
+ long lower = (long) Math.floor(position);
+ long higher = (long) Math.ceil(position);
+
+ int i = 0;
+ while (entriesList.get(i).getValue().get() < lower + 1) {
+ i++;
+ }
+
+ double lowerKey = entriesList.get(i).getKey().get();
+ if (higher == lower) {
+ return lowerKey;
+ }
+
+ if (entriesList.get(i).getValue().get() < higher + 1) {
+ i++;
+ }
+ return entriesList.get(i).getKey().get();
+ }
+ }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java
new file mode 100644
index 0000000..1a7aaca
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileCont.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContDoubleEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContEvaluator.PercentileAgg;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongCalculator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test class for GenericUDAFPercentileCont.
+ */
+public class TestGenericUDAFPercentileCont {
+ PercentileContLongCalculator calc = new PercentileContLongCalculator();
+
+ // Long type tests
+ @Test
+ public void testNoInterpolation() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.5, 3);
+ }
+
+ @Test
+ public void testInterpolateLower() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.49, 2.96);
+ }
+
+ @Test
+ public void testInterpolateHigher() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.51, 3.04);
+ }
+
+ @Test
+ public void testSingleItem50() throws Exception {
+ Long[] items = new Long[] {1L };
+ checkPercentile(items, 0.5, 1);
+ }
+
+ @Test
+ public void testSingleItem100() throws Exception {
+ Long[] items = new Long[] {1L };
+ checkPercentile(items, 1, 1);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT *
+ * INTO table percentile_src FROM vals; select percentile_cont(.50) within group (order by k) as
+ * perc from percentile_src;
+ */
+ @Test
+ public void testPostgresRefExample() throws Exception {
+ Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L };
+ checkPercentile(items, 0.5, 54);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT *
+ * INTO table percentile_src FROM vals; select percentile_cont(.72) within group (order by k) as
+ * perc from percentile_src;
+ */
+ @Test
+ public void testPostgresRefExample2() throws Exception {
+ Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L };
+ checkPercentile(items, 0.72, 76.64);
+ }
+
+ // Double type tests
+ @Test
+ public void testDoubleNoInterpolation() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.5, 3);
+ }
+
+ @Test
+ public void testDoubleInterpolateLower() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.49, 2.96);
+ }
+
+ @Test
+ public void testDoubleInterpolateHigher() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.51, 3.04);
+ }
+
+ @Test
+ public void testDoubleSingleItem50() throws Exception {
+ Double[] items = new Double[] {1.0 };
+ checkPercentile(items, 0.5, 1);
+ }
+
+ @Test
+ public void testDoubleSingleItem100() throws Exception {
+ Double[] items = new Double[] {1.0 };
+ checkPercentile(items, 1, 1);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54.0), (35.0), (15.0), (15.0), (76.0), (87.0),
+ * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_cont(.50) within group
+ * (order by k) as perc from percentile_src;
+ */
+ @Test
+ public void testDoublePostgresRefExample() throws Exception {
+ Double[] items = new Double[] {54.0, 35.0, 15.0, 15.0, 76.0, 87.0, 78.0 };
+ checkPercentile(items, 0.5, 54);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54.5), (35.3), (15.7), (15.7), (76.8), (87.34),
+ * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_cont(.72) within group
+ * (order by k) as perc from percentile_src;
+ */
+ @Test
+ public void testDoublePostgresRefExample2() throws Exception {
+ Double[] items = new Double[] {54.5, 35.3, 15.7, 15.7, 76.8, 87.34, 78.0 };
+ checkPercentile(items, 0.72, 77.184);
+ }
+
+ @SuppressWarnings({ "unchecked", "resource" })
+ private void checkPercentile(Long[] items, double percentile, double expected) throws Exception {
+ PercentileContLongEvaluator eval = new GenericUDAFPercentileCont.PercentileContLongEvaluator();
+
+ PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg();
+
+ agg.percentiles = new ArrayList<DoubleWritable>();
+ agg.percentiles.add(new DoubleWritable(percentile));
+
+ for (int i = 0; i < items.length; i++) {
+ eval.increment(agg, new LongWritable(items[i]), 1);
+ }
+
+ DoubleWritable result = (DoubleWritable) eval.terminate(agg);
+
+ Assert.assertEquals(expected, result.get(), 0.01);
+ eval.close();
+ }
+
+ @SuppressWarnings({ "unchecked", "resource" })
+ private void checkPercentile(Double[] items, double percentile, double expected) throws Exception {
+ PercentileContDoubleEvaluator eval = new GenericUDAFPercentileCont.PercentileContDoubleEvaluator();
+
+ PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg();
+
+ agg.percentiles = new ArrayList<DoubleWritable>();
+ agg.percentiles.add(new DoubleWritable(percentile));
+
+ for (int i = 0; i < items.length; i++) {
+ eval.increment(agg, new DoubleWritable(items[i]), 1);
+ }
+
+ DoubleWritable result = (DoubleWritable) eval.terminate(agg);
+
+ Assert.assertEquals(expected, result.get(), 0.01);
+ eval.close();
+ }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java
new file mode 100644
index 0000000..1a1b3b0
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDAFPercentileDisc.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContEvaluator.PercentileAgg;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont.PercentileContLongEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc.PercentileDiscDoubleEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc.PercentileDiscLongCalculator;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test class for GenericUDAFPercentileDisc.
+ */
+public class TestGenericUDAFPercentileDisc {
+ PercentileDiscLongCalculator calc = new PercentileDiscLongCalculator();
+
+ // Long type tests
+ @Test
+ public void testNoInterpolation() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.5, 3);
+ }
+
+ @Test
+ public void testInterpolateLower() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.49, 3.0);
+ }
+
+ @Test
+ public void testInterpolateHigher() throws Exception {
+ Long[] items = new Long[] {1L, 2L, 3L, 4L, 5L };
+ checkPercentile(items, 0.51, 4.0);
+ }
+
+ @Test
+ public void testSingleItem50() throws Exception {
+ Long[] items = new Long[] {1L };
+ checkPercentile(items, 0.5, 1);
+ }
+
+ @Test
+ public void testSingleItem100() throws Exception {
+ Long[] items = new Long[] {1L };
+ checkPercentile(items, 1, 1);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT *
+ * INTO table percentile_src FROM vals; select percentile_disc(.50) within group (order by k) as
+ * perc from percentile_src;
+ */
+ @Test
+ public void testPostgresRefExample() throws Exception {
+ Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L };
+ checkPercentile(items, 0.5, 54);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54), (35), (15), (15), (76), (87), (78)) SELECT *
+ * INTO table percentile_src FROM vals; select percentile_disc(.72) within group (order by k) as
+ * perc from percentile_src;
+ */
+ @Test
+ public void testPostgresRefExample2() throws Exception {
+ Long[] items = new Long[] {54L, 35L, 15L, 15L, 76L, 87L, 78L };
+ checkPercentile(items, 0.72, 78);
+ }
+
+ // Long type tests
+ @Test
+ public void testDoubleNoInterpolation() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.5, 3);
+ }
+
+ @Test
+ public void testDoubleInterpolateLower() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.49, 3.0);
+ }
+
+ @Test
+ public void testDoubleInterpolateHigher() throws Exception {
+ Double[] items = new Double[] {1.0, 2.0, 3.0, 4.0, 5.0 };
+ checkPercentile(items, 0.51, 4.0);
+ }
+
+ @Test
+ public void testDoubleSingleItem50() throws Exception {
+ Double[] items = new Double[] {1.0 };
+ checkPercentile(items, 0.5, 1);
+ }
+
+ @Test
+ public void testDoubleSingleItem100() throws Exception {
+ Double[] items = new Double[] {1.0 };
+ checkPercentile(items, 1, 1);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54.0), (35.0), (15.0), (15.0), (76.0), (87.0),
+ * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_disc(.50) within group
+ * (order by k) as perc from percentile_src;
+ */
+ @Test
+ public void testDoublePostgresRefExample() throws Exception {
+ Double[] items = new Double[] {54.0, 35.0, 15.0, 15.0, 76.0, 87.0, 78.0 };
+ checkPercentile(items, 0.5, 54);
+ }
+
+ /*
+ * POSTGRES check: WITH vals (k) AS (VALUES (54.5), (35.3), (15.7), (15.7), (76.8), (87.34),
+ * (78.0)) SELECT * INTO table percentile_src FROM vals; select percentile_disc(.72) within group
+ * (order by k) as perc from percentile_src;
+ */
+ @Test
+ public void testDoublePostgresRefExample2() throws Exception {
+ Double[] items = new Double[] {54.5, 35.3, 15.7, 15.7, 76.8, 87.34, 78.0 };
+ checkPercentile(items, 0.72, 78.0);
+ }
+
+ private void checkPercentile(Long[] items, double percentile, double expected) throws Exception {
+ PercentileContLongEvaluator eval = new GenericUDAFPercentileDisc.PercentileDiscLongEvaluator();
+
+ PercentileAgg agg = new PercentileContLongEvaluator().new PercentileAgg();
+
+ agg.percentiles = new ArrayList<DoubleWritable>();
+ agg.percentiles.add(new DoubleWritable(percentile));
+
+ for (int i = 0; i < items.length; i++) {
+ eval.increment(agg, new LongWritable(items[i]), 1);
+ }
+
+ DoubleWritable result = (DoubleWritable) eval.terminate(agg);
+
+ Assert.assertEquals(expected, result.get(), 0.01);
+ eval.close();
+ }
+
+ @SuppressWarnings({ "unchecked", "resource" })
+ private void checkPercentile(Double[] items, double percentile, double expected)
+ throws Exception {
+ PercentileDiscDoubleEvaluator eval =
+ new GenericUDAFPercentileDisc.PercentileDiscDoubleEvaluator();
+
+ PercentileAgg agg = new PercentileDiscDoubleEvaluator().new PercentileAgg();
+
+ agg.percentiles = new ArrayList<DoubleWritable>();
+ agg.percentiles.add(new DoubleWritable(percentile));
+
+ for (int i = 0; i < items.length; i++) {
+ eval.increment(agg, new DoubleWritable(items[i]), 1);
+ }
+
+ DoubleWritable result = (DoubleWritable) eval.terminate(agg);
+
+ Assert.assertEquals(expected, result.get(), 0.01);
+ eval.close();
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/udaf_percentile_cont.q b/ql/src/test/queries/clientpositive/udaf_percentile_cont.q
new file mode 100644
index 0000000..6d788c1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udaf_percentile_cont.q
@@ -0,0 +1,69 @@
+--! qt:dataset:src
+DESCRIBE FUNCTION percentile_cont;
+DESCRIBE FUNCTION EXTENDED percentile_cont;
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+-- SORT_QUERY_RESULTS
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+-- test null handling
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+-- test empty array handling
+SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+select percentile_cont(cast(key as bigint), 0.5) from src where false;
diff --git a/ql/src/test/queries/clientpositive/udaf_percentile_disc.q b/ql/src/test/queries/clientpositive/udaf_percentile_disc.q
new file mode 100644
index 0000000..7ba703e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udaf_percentile_disc.q
@@ -0,0 +1,69 @@
+--! qt:dataset:src
+DESCRIBE FUNCTION percentile_disc;
+DESCRIBE FUNCTION EXTENDED percentile_disc;
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+-- SORT_QUERY_RESULTS
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+-- test null handling
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+-- test empty array handling
+SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+select percentile_disc(cast(key as bigint), 0.5) from src where false;
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 0b772f0..4e44753 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -194,6 +194,8 @@ parse_url_tuple
percent_rank
percentile
percentile_approx
+percentile_cont
+percentile_disc
pi
pmod
posexplode
diff --git a/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out
new file mode 100644
index 0000000..dda6ce5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out
@@ -0,0 +1,421 @@
+PREHOOK: query: DESCRIBE FUNCTION percentile_cont
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percentile_cont
+POSTHOOK: type: DESCFUNCTION
+percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]).
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont
+POSTHOOK: type: DESCFUNCTION
+percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]).
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont
+Function type:BUILTIN
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 NULL
+1 NULL
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 NULL
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 NULL
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 NULL
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1.0
+1 1.0
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 1.0
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 1.0
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 1.0
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
diff --git a/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out b/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out
new file mode 100644
index 0000000..7400d0b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udaf_percentile_cont_disc.q.out
@@ -0,0 +1,842 @@
+PREHOOK: query: DESCRIBE FUNCTION percentile_cont
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percentile_cont
+POSTHOOK: type: DESCFUNCTION
+percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]).
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_cont
+POSTHOOK: type: DESCFUNCTION
+percentile_cont(input, pc) - Returns the percentile of expr at pc (range: [0,1]).
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileCont
+Function type:BUILTIN
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_cont(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_cont(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 NULL
+1 NULL
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 NULL
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 NULL
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 NULL
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_cont(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1.0
+1 1.0
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 1.0
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 1.0
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 1.0
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_cont(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: DESCRIBE FUNCTION percentile_disc
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percentile_disc
+POSTHOOK: type: DESCFUNCTION
+There is no documentation for function 'percentile_disc'
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc
+POSTHOOK: type: DESCFUNCTION
+There is no documentation for function 'percentile_disc'
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc
+Function type:BUILTIN
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS INT), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 4.5 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 117.0 119.0
+12 120.0 127.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 154.0 158.0
+16 160.0 166.5 169.0
+17 170.0 175.0 179.0
+18 180.0 186.5 189.0
+19 190.0 194.5 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 216.5 219.0
+22 221.0 224.0 229.0
+23 230.0 234.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 264.0 266.0
+27 272.0 275.0 278.0
+28 280.0 283.5 289.0
+29 291.0 297.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 324.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 376.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 42.5 47.0
+40 400.0 403.5 409.0
+41 411.0 415.5 419.0
+42 421.0 425.5 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 467.5 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 494.5 498.0
+5 51.0 54.0 58.0
+6 64.0 66.5 69.0
+7 70.0 73.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 NULL
+1 NULL
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 NULL
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 NULL
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 NULL
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1.0
+1 1.0
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 1.0
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 1.0
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 1.0
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
diff --git a/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out
new file mode 100644
index 0000000..75fa36c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out
@@ -0,0 +1,421 @@
+PREHOOK: query: DESCRIBE FUNCTION percentile_disc
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percentile_disc
+POSTHOOK: type: DESCFUNCTION
+percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile_disc
+POSTHOOK: type: DESCFUNCTION
+percentile_disc(input, pc) - Returns the percentile of expr at pc (range: [0,1]) without interpolation.
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileDisc
+Function type:BUILTIN
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 5.0 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 118.0 119.0
+12 120.0 128.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 155.0 158.0
+16 160.0 167.0 169.0
+17 170.0 175.0 179.0
+18 180.0 187.0 189.0
+19 190.0 195.0 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 217.0 219.0
+22 221.0 224.0 229.0
+23 230.0 235.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 265.0 266.0
+27 272.0 275.0 278.0
+28 280.0 284.0 289.0
+29 291.0 298.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 325.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 377.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 43.0 47.0
+40 400.0 404.0 409.0
+41 411.0 417.0 419.0
+42 421.0 427.0 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 468.0 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 495.0 498.0
+5 51.0 54.0 58.0
+6 64.0 67.0 69.0
+7 70.0 74.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 5.0 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 118.0 119.0
+12 120.0 128.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 155.0 158.0
+16 160.0 167.0 169.0
+17 170.0 175.0 179.0
+18 180.0 187.0 189.0
+19 190.0 195.0 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 217.0 219.0
+22 221.0 224.0 229.0
+23 230.0 235.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 265.0 266.0
+27 272.0 275.0 278.0
+28 280.0 284.0 289.0
+29 291.0 298.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 325.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 377.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 43.0 47.0
+40 400.0 404.0 409.0
+41 411.0 417.0 419.0
+42 421.0 427.0 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 468.0 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 495.0 498.0
+5 51.0 54.0 58.0
+6 64.0 67.0 69.0
+7 70.0 74.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 5.0 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 118.0 119.0
+12 120.0 128.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 155.0 158.0
+16 160.0 167.0 169.0
+17 170.0 175.0 179.0
+18 180.0 187.0 189.0
+19 190.0 195.0 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 217.0 219.0
+22 221.0 224.0 229.0
+23 230.0 235.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 265.0 266.0
+27 272.0 275.0 278.0
+28 280.0 284.0 289.0
+29 291.0 298.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 325.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 377.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 43.0 47.0
+40 400.0 404.0 409.0
+41 411.0 417.0 419.0
+42 421.0 427.0 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 468.0 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 495.0 498.0
+5 51.0 54.0 58.0
+6 64.0 67.0 69.0
+7 70.0 74.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(CAST(substr(value, 5) AS INT), 0.0),
+ percentile_disc(CAST(substr(value, 5) AS DOUBLE), 0.5),
+ percentile_disc(CAST(substr(value, 5) AS DECIMAL), 1.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0.0 5.0 9.0
+1 10.0 15.0 19.0
+10 100.0 103.0 105.0
+11 111.0 118.0 119.0
+12 120.0 128.0 129.0
+13 131.0 137.0 138.0
+14 143.0 146.0 149.0
+15 150.0 155.0 158.0
+16 160.0 167.0 169.0
+17 170.0 175.0 179.0
+18 180.0 187.0 189.0
+19 190.0 195.0 199.0
+2 20.0 26.0 28.0
+20 200.0 205.0 209.0
+21 213.0 217.0 219.0
+22 221.0 224.0 229.0
+23 230.0 235.0 239.0
+24 241.0 244.0 249.0
+25 252.0 256.0 258.0
+26 260.0 265.0 266.0
+27 272.0 275.0 278.0
+28 280.0 284.0 289.0
+29 291.0 298.0 298.0
+3 30.0 35.0 37.0
+30 302.0 307.0 309.0
+31 310.0 316.0 318.0
+32 321.0 325.0 327.0
+33 331.0 333.0 339.0
+34 341.0 345.0 348.0
+35 351.0 353.0 356.0
+36 360.0 367.0 369.0
+37 373.0 377.0 379.0
+38 382.0 384.0 389.0
+39 392.0 396.0 399.0
+4 41.0 43.0 47.0
+40 400.0 404.0 409.0
+41 411.0 417.0 419.0
+42 421.0 427.0 429.0
+43 430.0 435.0 439.0
+44 443.0 446.0 449.0
+45 452.0 455.0 459.0
+46 460.0 468.0 469.0
+47 470.0 477.0 479.0
+48 480.0 484.0 489.0
+49 490.0 495.0 498.0
+5 51.0 54.0 58.0
+6 64.0 67.0 69.0
+7 70.0 74.0 78.0
+8 80.0 84.0 87.0
+9 90.0 95.0 98.0
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(NULL, 0.0)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 NULL
+1 NULL
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 NULL
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 NULL
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 NULL
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+ percentile_disc(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5)
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 1.0
+1 1.0
+10 NULL
+11 NULL
+12 NULL
+13 NULL
+14 NULL
+15 NULL
+16 NULL
+17 NULL
+18 NULL
+19 NULL
+2 1.0
+20 NULL
+21 NULL
+22 NULL
+23 NULL
+24 NULL
+25 NULL
+26 NULL
+27 NULL
+28 NULL
+29 NULL
+3 1.0
+30 NULL
+31 NULL
+32 NULL
+33 NULL
+34 NULL
+35 NULL
+36 NULL
+37 NULL
+38 NULL
+39 NULL
+4 1.0
+40 NULL
+41 NULL
+42 NULL
+43 NULL
+44 NULL
+45 NULL
+46 NULL
+47 NULL
+48 NULL
+49 NULL
+5 NULL
+6 NULL
+7 NULL
+8 NULL
+9 NULL
+PREHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_disc(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL