You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/11/06 04:13:24 UTC
[hive] branch master updated: HIVE-22292: Implement
Hypothetical-Set Aggregate Functions (Krisztian Kasa,
reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d0bd071 HIVE-22292: Implement Hypothetical-Set Aggregate Functions (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
d0bd071 is described below
commit d0bd0712b957d1c053539689892909b0fe2bd376
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Nov 5 20:12:13 2019 -0800
HIVE-22292: Implement Hypothetical-Set Aggregate Functions (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
---
.../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +
.../hadoop/hive/ql/exec/FunctionRegistry.java | 4 +-
.../hive/ql/exec/WindowFunctionDescription.java | 2 +-
.../hadoop/hive/ql/exec/WindowFunctionInfo.java | 21 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 4 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 35 +-
.../hive/ql/udf/generic/GenericUDAFCumeDist.java | 37 +-
.../hive/ql/udf/generic/GenericUDAFDenseRank.java | 131 +++-
.../ql/udf/generic/GenericUDAFPercentRank.java | 33 +-
.../ql/udf/generic/GenericUDAFPercentileCont.java | 109 +--
.../ql/udf/generic/GenericUDAFPercentileDisc.java | 40 +-
.../hive/ql/udf/generic/GenericUDAFRank.java | 281 +++++++-
.../apache/hadoop/hive/ql/util/NullOrdering.java | 69 ++
.../hadoop/hive/ql/exec/TestFunctionRegistry.java | 4 -
.../hive/ql/parse/TestParseWithinGroupClause.java | 27 +-
.../clientpositive/hypothetical_set_aggregates.q | 315 +++++++++
.../hypothetical_set_aggregates.q.out | 762 +++++++++++++++++++++
.../clientpositive/udaf_percentile_cont.q.out | 18 +-
.../clientpositive/udaf_percentile_disc.q.out | 18 +-
19 files changed, 1759 insertions(+), 153 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 5e88f30..467ce50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -474,6 +474,8 @@ public enum ErrorMsg {
OBJECTNAME_CONTAINS_DOT(10420, "Table or database name may not contain dot(.) character", true),
WITHIN_GROUP_NOT_ALLOWED(10421,
"Not an ordered-set aggregate function: {0}. WITHIN GROUP clause is not allowed.", true),
+ WITHIN_GROUP_PARAMETER_MISMATCH(10422,
+ "The number of hypothetical direct arguments ({0}) must match the number of ordering columns ({1})", true),
//========================== 20000 range starts here ========================//
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 0599197..3e43646 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -1892,10 +1892,10 @@ public final class FunctionRegistry {
}
}
- public static boolean supportsWithinGroup(String functionName) throws SemanticException {
+ public static boolean isOrderedAggregate(String functionName) throws SemanticException {
WindowFunctionInfo windowInfo = getWindowFunctionInfo(functionName);
if (windowInfo != null) {
- return windowInfo.supportsWithinGroup();
+ return windowInfo.isOrderedAggregate();
}
return false;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
index 48645dc..015c26a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionDescription.java
@@ -83,5 +83,5 @@ public @interface WindowFunctionDescription {
*
* @return true if the function can be used as an ordered-set aggregate
*/
- boolean supportsWithinGroup() default false;
+ boolean orderedAggregate() default false;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
index a0b0e48..8e62643 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/WindowFunctionInfo.java
@@ -27,7 +27,7 @@ public class WindowFunctionInfo extends FunctionInfo {
private final boolean supportsWindow;
private final boolean pivotResult;
private final boolean impliesOrder;
- private final boolean supportsWithinGroup;
+ private final boolean orderedAggregate;
public WindowFunctionInfo(FunctionType functionType, String functionName,
GenericUDAFResolver resolver, FunctionResource[] resources) {
@@ -37,7 +37,7 @@ public class WindowFunctionInfo extends FunctionInfo {
supportsWindow = def == null ? true : def.supportsWindow();
pivotResult = def == null ? false : def.pivotResult();
impliesOrder = def == null ? false : def.impliesOrder();
- supportsWithinGroup = def == null ? false : def.supportsWithinGroup();
+ orderedAggregate = def == null ? false : def.orderedAggregate();
}
public boolean isSupportsWindow() {
@@ -48,11 +48,24 @@ public class WindowFunctionInfo extends FunctionInfo {
return pivotResult;
}
+ /**
+ * Property for indicating that the function is a window function and an OVER clause is required when invoked.
+ * example:
+ * SELECT val, rank() OVER (ORDER BY val DESC) FROM t_table;
+ * @return true if the function is a window function, false otherwise
+ */
public boolean isImpliesOrder() {
return impliesOrder;
}
- public boolean supportsWithinGroup() {
- return supportsWithinGroup;
+ /**
+ * Property for indicating that the function is an Ordered-Set Aggregate function.
+ * A WITHIN GROUP clause is required when invoked.
+ * example:
+ * SELECT rank(1) WITHIN GROUP (ORDER BY val) FROM t_table;
+ * @return true if the function is a an Ordered-Set Aggregate function, false otherwise
+ */
+ public boolean isOrderedAggregate() {
+ return orderedAggregate;
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 55c6863..6639695 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -224,9 +224,9 @@ function
(STAR) => (star=STAR)
| (dist=KW_DISTINCT | KW_ALL)? (selectExpression (COMMA selectExpression)*)?
)
- RPAREN ((KW_OVER ws=window_specification) | (within=KW_WITHIN KW_GROUP LPAREN KW_ORDER KW_BY colRef=columnRefOrder RPAREN))?
+ RPAREN ((KW_OVER ws=window_specification) | (within=KW_WITHIN KW_GROUP LPAREN ordBy=orderByClause RPAREN))?
-> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?)
- -> {$within != null}? ^(TOK_FUNCTION functionName (selectExpression+)? ^(TOK_WITHIN_GROUP $colRef))
+ -> {$within != null}? ^(TOK_FUNCTION functionName (selectExpression+)? ^(TOK_WITHIN_GROUP $ordBy))
-> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?)
-> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?)
;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 0198c0f..11e35d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -255,6 +255,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSurrogateKey;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
import org.apache.hadoop.hive.ql.util.DirectionUtils;
+import org.apache.hadoop.hive.ql.util.NullOrdering;
import org.apache.hadoop.hive.ql.util.ResourceDownloader;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.DelimitedJSONSerDe;
@@ -948,17 +949,35 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
private void transformWithinGroup(ASTNode expressionTree, Tree withinGroupNode) throws SemanticException {
Tree functionNameNode = expressionTree.getChild(0);
- if (!FunctionRegistry.supportsWithinGroup(functionNameNode.getText())) {
+ if (!FunctionRegistry.isOrderedAggregate(functionNameNode.getText())) {
throw new SemanticException(ErrorMsg.WITHIN_GROUP_NOT_ALLOWED, functionNameNode.getText());
}
- Tree tabSortColNameNode = withinGroupNode.getChild(0);
- ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0);
- expressionTree.deleteChild(withinGroupNode.getChildIndex());
- // backward compatibility: the sortkey is the first paramater of the percentile_cont and percentile_disc functions
- expressionTree.insertChild(1, sortKey);
- expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral,
- Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType()))));
+ List<Tree> parameters = new ArrayList<>(expressionTree.getChildCount() - 2);
+ for (int i = 1; i < expressionTree.getChildCount() - 1; ++i) {
+ parameters.add(expressionTree.getChild(i));
+ }
+ while (expressionTree.getChildCount() > 1) {
+ expressionTree.deleteChild(1);
+ }
+
+ Tree orderByNode = withinGroupNode.getChild(0);
+ if (parameters.size() != orderByNode.getChildCount()) {
+ throw new SemanticException(ErrorMsg.WITHIN_GROUP_PARAMETER_MISMATCH,
+ Integer.toString(parameters.size()), Integer.toString(orderByNode.getChildCount()));
+ }
+
+ for (int i = 0; i < orderByNode.getChildCount(); ++i) {
+ expressionTree.addChild(parameters.get(i));
+ Tree tabSortColNameNode = orderByNode.getChild(i);
+ Tree nullsNode = tabSortColNameNode.getChild(0);
+ ASTNode sortKey = (ASTNode) tabSortColNameNode.getChild(0).getChild(0);
+ expressionTree.addChild(sortKey);
+ expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral,
+ Integer.toString(DirectionUtils.tokenToCode(tabSortColNameNode.getType()))));
+ expressionTree.addChild(ASTBuilder.createAST(HiveParser.NumberLiteral,
+ Integer.toString(NullOrdering.fromToken(nullsNode.getType()).getCode())));
+ }
}
private List<ASTNode> doPhase1GetDistinctFuncExprs(Map<String, ASTNode> aggregationTrees) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
index d0c155f..242a413 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCumeDist.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.
+ writableDoubleObjectInspector;
+
import java.util.ArrayList;
import java.util.List;
@@ -27,7 +30,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
@Description(
@@ -41,20 +43,25 @@ import org.apache.hadoop.io.IntWritable;
supportsWindow = false,
pivotResult = true,
rankingFunction = true,
- impliesOrder = true)
+ orderedAggregate = true)
public class GenericUDAFCumeDist extends GenericUDAFRank {
@Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() {
return new GenericUDAFCumeDistEvaluator();
}
+ @Override
+ protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() {
+ return new GenericUDAFHypotheticalSetCumeDistEvaluator();
+ }
+
public static class GenericUDAFCumeDistEvaluator extends GenericUDAFAbstractRankEvaluator {
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
super.init(m, parameters);
return ObjectInspectorFactory
- .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ .getStandardListObjectInspector(writableDoubleObjectInspector);
}
@Override
@@ -91,4 +98,26 @@ public class GenericUDAFCumeDist extends GenericUDAFRank {
return distances;
}
}
+
+ /**
+ * Evaluator for calculating the cumulative distribution.
+ * SELECT cume_dist(expression) WITHIN GROUP (ORDER BY col1)
+ * Implementation is based on hypothetical rank calculation: (rank + 1) / (count + 1)
+ * Differences:
+ * - rows which has equal column value with the specified expression value should be counted in the rank
+ * - the return value type of this function is double.
+ */
+ public static class GenericUDAFHypotheticalSetCumeDistEvaluator
+ extends GenericUDAFHypotheticalSetRankEvaluator {
+
+ public GenericUDAFHypotheticalSetCumeDistEvaluator() {
+ super(true, PARTIAL_RANK_OI, writableDoubleObjectInspector);
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ return new DoubleWritable((rankBuffer.rank + 1.0) / (rankBuffer.rowCount + 1.0));
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
index 992f5bf..7fb50ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFDenseRank.java
@@ -18,8 +18,22 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import static org.apache.hadoop.hive.ql.util.DirectionUtils.ASCENDING_CODE;
+import static org.apache.hadoop.hive.ql.util.DirectionUtils.DESCENDING_CODE;
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.
+ writableLongObjectInspector;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.LongWritable;
@Description(
name = "dense_rank",
@@ -32,14 +46,19 @@ import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
supportsWindow = false,
pivotResult = true,
rankingFunction = true,
- impliesOrder = true)
+ orderedAggregate = true)
public class GenericUDAFDenseRank extends GenericUDAFRank {
@Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() {
return new GenericUDAFDenseRankEvaluator();
}
+ @Override
+ protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() {
+ return new GenericUDAFHypotheticalSetDenseRankEvaluator();
+ }
+
public static class GenericUDAFDenseRankEvaluator extends GenericUDAFRankEvaluator {
/*
@@ -50,5 +69,111 @@ public class GenericUDAFDenseRank extends GenericUDAFRank {
rb.currentRank++;
}
}
-}
+ /**
+ * Evaluator for calculating the dense rank.
+ * SELECT dense_rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*)
+ * Implementation is based on hypothetical rank calculation but the group of values are considered distinct.
+ * Since the source of the input stream is not sorted a HashSet is used for filter out duplicate values
+ * which can lead to OOM in large data sets.
+ */
+ public static class GenericUDAFHypotheticalSetDenseRankEvaluator extends GenericUDAFHypotheticalSetRankEvaluator {
+
+ public GenericUDAFHypotheticalSetDenseRankEvaluator() {
+ super(false, writableLongObjectInspector, writableLongObjectInspector);
+ }
+
+ @Override
+ protected void initPartial2AndFinalOI(ObjectInspector[] parameters) {
+ // nop
+ }
+
+ private static final class RowData {
+ private final List<Object> columnValues;
+
+ private RowData(List<Object> columnValues) {
+ this.columnValues = columnValues;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ RowData rowData = (RowData) o;
+ return Objects.equals(columnValues, rowData.columnValues);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(columnValues);
+ }
+ }
+
+ private static class HypotheticalSetDenseRankBuffer extends AbstractAggregationBuffer {
+ protected Set<RowData> elements = new HashSet<>();
+ private long rank = 0;
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new HypotheticalSetDenseRankBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg;
+ rankBuffer.elements.clear();
+ rankBuffer.rank = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg;
+
+ CompareResult compareResult = compare(parameters);
+ if (compareResult.getCompareResult() == 0) {
+ return;
+ }
+
+ if (compareResult.getOrder() == ASCENDING_CODE && compareResult.getCompareResult() < 0 ||
+ compareResult.getOrder() == DESCENDING_CODE && compareResult.getCompareResult() > 0) {
+ List<Object> columnValues = new ArrayList<>(parameters.length / 4);
+ for (int i = 0; i < parameters.length / 4; ++i) {
+ columnValues.add(parameters[i * 4 + 1]);
+ }
+ RowData rowData = new RowData(columnValues);
+ if (!rankBuffer.elements.contains(rowData)) {
+ rankBuffer.elements.add(rowData);
+ rankBuffer.rank++;
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg;
+ return new LongWritable(rankBuffer.rank + 1);
+ }
+
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial == null) {
+ return;
+ }
+
+ HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg;
+ rankBuffer.rank += ((LongWritable)partial).get() - 1;
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetDenseRankBuffer rankBuffer = (HypotheticalSetDenseRankBuffer) agg;
+ return new LongWritable(rankBuffer.rank + 1);
+ }
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
index 64e9c8b..49fd037 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentRank.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.udf.generic;
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.
+ writableDoubleObjectInspector;
+
import java.util.ArrayList;
import org.slf4j.Logger;
@@ -28,7 +31,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
@Description(
@@ -40,23 +42,28 @@ import org.apache.hadoop.io.IntWritable;
supportsWindow = false,
pivotResult = true,
rankingFunction = true,
- impliesOrder = true)
+ orderedAggregate = true)
public class GenericUDAFPercentRank extends GenericUDAFRank {
static final Logger LOG = LoggerFactory.getLogger(GenericUDAFPercentRank.class.getName());
@Override
- protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() {
return new GenericUDAFPercentRankEvaluator();
}
+ @Override
+ protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() {
+ return new GenericUDAFHypotheticalSetPercentRankEvaluator();
+ }
+
public static class GenericUDAFPercentRankEvaluator extends GenericUDAFAbstractRankEvaluator {
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
super.init(m, parameters);
return ObjectInspectorFactory.getStandardListObjectInspector(
- PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ writableDoubleObjectInspector);
}
@Override
@@ -76,5 +83,23 @@ public class GenericUDAFPercentRank extends GenericUDAFRank {
return pranks;
}
}
+
+ /**
+ * Evaluator for calculating the percent rank.
+ * SELECT percent_rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*)
+ * Implementation is based on hypothetical rank calculation: rank - 1 / count
+ */
+ public static class GenericUDAFHypotheticalSetPercentRankEvaluator extends GenericUDAFHypotheticalSetRankEvaluator {
+
+ public GenericUDAFHypotheticalSetPercentRankEvaluator() {
+ super(false, PARTIAL_RANK_OI, writableDoubleObjectInspector);
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ return new DoubleWritable(((double)rankBuffer.rank) / rankBuffer.rowCount);
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java
index ad61410..f7fa280 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileCont.java
@@ -63,7 +63,7 @@ import org.apache.hadoop.io.LongWritable;
@WindowFunctionDescription(
supportsWindow = false,
pivotResult = true,
- supportsWithinGroup = true)
+ orderedAggregate = true)
public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
private static final Comparator<LongWritable> LONG_COMPARATOR;
@@ -81,21 +81,35 @@ public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
- validateParameterTypes(parameters);
+ if (parameters.length == 2) { // column ref, expression (0 <= percentile <= 1)
+ return getGenericUDAFEvaluator(parameters[0], parameters[1]);
+ } else if (parameters.length == 4) {
+ // expression (0 <= percentile <= 1), order by column ref, order direction, null ordering
+ return getGenericUDAFEvaluator(parameters[1], parameters[0]);
+ } else {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Only 1 argument and a single order column " +
+ "reference expected.");
+ }
+ }
+
+ private GenericUDAFEvaluator getGenericUDAFEvaluator(TypeInfo orderByColumn, TypeInfo percentile)
+ throws UDFArgumentTypeException {
+ if (orderByColumn.getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + orderByColumn.getTypeName() + " is passed.");
+ }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ switch (((PrimitiveTypeInfo) orderByColumn).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
case VOID:
- return parameters[1].getCategory() == ObjectInspector.Category.LIST ?
- new PercentileContLongArrayEvaluator() : new PercentileContLongEvaluator();
+ return createLongEvaluator(percentile);
case FLOAT:
case DOUBLE:
case DECIMAL:
- return parameters[1].getCategory() == ObjectInspector.Category.LIST ?
- new PercentileContDoubleArrayEvaluator() : new PercentileContDoubleEvaluator();
+ return createDoubleEvaluator(percentile);
case STRING:
case TIMESTAMP:
case VARCHAR:
@@ -104,22 +118,18 @@ public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
case DATE:
default:
throw new UDFArgumentTypeException(0,
- "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed.");
+ "Only numeric arguments are accepted but " + orderByColumn.getTypeName() + " is passed.");
}
}
- protected void validateParameterTypes(TypeInfo[] parameters) throws UDFArgumentTypeException {
- if (parameters.length < 2) {
- throw new UDFArgumentTypeException(parameters.length - 1, "Not enough arguments.");
- }
- if (parameters.length > 3) {
- throw new UDFArgumentTypeException(parameters.length - 1, "Too many arguments.");
- }
+ protected GenericUDAFEvaluator createLongEvaluator(TypeInfo percentile) {
+ return percentile.getCategory() == ObjectInspector.Category.LIST ?
+ new PercentileContLongArrayEvaluator() : new PercentileContLongEvaluator();
+ }
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
+ protected GenericUDAFEvaluator createDoubleEvaluator(TypeInfo percentile) {
+ return percentile.getCategory() == ObjectInspector.Category.LIST ?
+ new PercentileContDoubleArrayEvaluator() : new PercentileContDoubleEvaluator();
}
/**
@@ -196,7 +206,15 @@ public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
super.init(m, parameters);
- initInspectors(parameters);
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data
+ if (parameters.length == 2) { // Order direction was not given, default to asc
+ initInspectors((PrimitiveObjectInspector) parameters[0]);
+ } else {
+ initInspectors((PrimitiveObjectInspector) parameters[1], (WritableConstantIntObjectInspector) parameters[2]);
+ }
+ } else { // ...for partial result as input
+ initPartialInspectors((StructObjectInspector) parameters[0]);
+ }
if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// ...for partial result
partialResult = new Object[3];
@@ -229,25 +247,29 @@ public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
entriesList.sort(isAscending ? comparator : comparator.reversed());
}
- protected void initInspectors(ObjectInspector[] parameters) {
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// ...for real input data
- inputOI = (PrimitiveObjectInspector) parameters[0];
- if (parameters.length == 2) { // Order direction was not given, default to asc
- isAscending = true;
- } else {
- isAscending = ((WritableConstantIntObjectInspector) parameters[2]).
- getWritableConstantValue().get() != DESCENDING_CODE;
- }
- } else { // ...for partial result as input
- soi = (StructObjectInspector) parameters[0];
+ // ...for real input data, no order direction
+ protected void initInspectors(PrimitiveObjectInspector orderByColumnOI) {
+ inputOI = orderByColumnOI;
+ isAscending = true;
+ }
- countsField = soi.getStructFieldRef("counts");
- percentilesField = soi.getStructFieldRef("percentiles");
- isAscendingField = soi.getStructFieldRef("isAscending");
+ // ...for real input data, with order direction
+ protected void initInspectors(
+ PrimitiveObjectInspector orderByColumnOI, WritableConstantIntObjectInspector orderDirectionOI) {
+ inputOI = orderByColumnOI;
+ isAscending = orderDirectionOI.getWritableConstantValue().get() != DESCENDING_CODE;
+ }
- countsOI = (MapObjectInspector) countsField.getFieldObjectInspector();
- percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector();
- }
+ // ...for partial result as input
+ protected void initPartialInspectors(StructObjectInspector objectInspector) {
+ soi = objectInspector;
+
+ countsField = soi.getStructFieldRef("counts");
+ percentilesField = soi.getStructFieldRef("percentiles");
+ isAscendingField = soi.getStructFieldRef("isAscending");
+
+ countsOI = (MapObjectInspector) countsField.getFieldObjectInspector();
+ percentilesOI = (ListObjectInspector) percentilesField.getFieldObjectInspector();
}
@Override
@@ -268,16 +290,23 @@ public class GenericUDAFPercentileCont extends AbstractGenericUDAFResolver {
@Override
public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
PercentileAgg percAgg = (PercentileAgg) agg;
+ if (parameters.length == 4) {
+ iterate(percAgg, parameters[0], parameters[1]);
+ } else {
+ iterate(percAgg, parameters[1], parameters[0]);
+ }
+ }
+ private void iterate(PercentileAgg percAgg, Object percentiles, Object oderByColumnValue) {
if (percAgg.percentiles == null) {
- percAgg.percentiles = converter.convertPercentileParameter(parameters[1]);
+ percAgg.percentiles = converter.convertPercentileParameter(percentiles);
}
- if (parameters[0] == null) {
+ if (oderByColumnValue == null) {
return;
}
- T input = getInput(parameters[0], inputOI);
+ T input = getInput(oderByColumnValue, inputOI);
if (input != null) {
increment(percAgg, wrapInput(input), 1);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java
index c8d3c12..8ae8ca2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileDisc.java
@@ -22,12 +22,9 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.exec.WindowFunctionDescription;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.LongWritable;
@@ -41,36 +38,19 @@ import org.apache.hadoop.io.LongWritable;
@WindowFunctionDescription(
supportsWindow = false,
pivotResult = true,
- supportsWithinGroup = true)
+ orderedAggregate = true)
public class GenericUDAFPercentileDisc extends GenericUDAFPercentileCont {
@Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
- validateParameterTypes(parameters);
-
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case VOID:
- return parameters[1].getCategory() == ObjectInspector.Category.LIST ?
- new PercentileDiscLongArrayEvaluator() : new PercentileDiscLongEvaluator();
- case FLOAT:
- case DOUBLE:
- case DECIMAL:
- return parameters[1].getCategory() == ObjectInspector.Category.LIST ?
- new PercentileDiscDoubleArrayEvaluator() : new PercentileDiscDoubleEvaluator();
- case STRING:
- case TIMESTAMP:
- case VARCHAR:
- case CHAR:
- case BOOLEAN:
- case DATE:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric arguments are accepted but " + parameters[0].getTypeName() + " is passed.");
- }
+ protected GenericUDAFEvaluator createLongEvaluator(TypeInfo percentile) {
+ return percentile.getCategory() == ObjectInspector.Category.LIST ?
+ new PercentileDiscLongArrayEvaluator() : new PercentileDiscLongEvaluator();
+ }
+
+ @Override
+ protected GenericUDAFEvaluator createDoubleEvaluator(TypeInfo percentile) {
+ return percentile.getCategory() == ObjectInspector.Category.LIST ?
+ new PercentileDiscDoubleArrayEvaluator() : new PercentileDiscDoubleEvaluator();
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
index 13e2f53..644e728 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFRank.java
@@ -18,8 +18,26 @@
package org.apache.hadoop.hive.ql.udf.generic;
-import java.util.ArrayList;
+import static java.util.Arrays.asList;
+import static org.apache.hadoop.hive.ql.util.DirectionUtils.ASCENDING_CODE;
+import static org.apache.hadoop.hive.ql.util.DirectionUtils.DESCENDING_CODE;
+import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.
+ writableLongObjectInspector;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
+import org.apache.hadoop.hive.ql.util.NullOrdering;
+import org.apache.hadoop.hive.serde2.objectinspector.FullMapEqualComparer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.LongWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.Description;
@@ -34,42 +52,70 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
@Description(
name = "rank",
value = "_FUNC_(x)")
@WindowFunctionDescription(
- supportsWindow = false,
- pivotResult = true,
- rankingFunction = true,
- impliesOrder = true)
+ supportsWindow = false,
+ pivotResult = true,
+ rankingFunction = true,
+ orderedAggregate = true)
public class GenericUDAFRank extends AbstractGenericUDAFResolver {
static final Logger LOG = LoggerFactory.getLogger(GenericUDAFRank.class.getName());
@Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException {
+ if (info.isWindowing()) {
+ return getWindowingEvaluator(info.getParameterObjectInspectors());
+ }
+ return getHypotheticalSetEvaluator(info.getParameterObjectInspectors());
+ }
+
+ private GenericUDAFEvaluator getWindowingEvaluator(ObjectInspector[] parameters) throws SemanticException {
if (parameters.length < 1) {
throw new UDFArgumentTypeException(parameters.length - 1,
"One or more arguments are expected.");
}
for (int i = 0; i < parameters.length; i++) {
- ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[i]);
- if (!ObjectInspectorUtils.compareSupported(oi)) {
- throw new UDFArgumentTypeException(i,
- "Cannot support comparison of map<> type or complex type containing map<>.");
- }
+ supportsCompare(parameters, i);
}
- return createEvaluator();
+ return createWindowingEvaluator();
}
- protected GenericUDAFAbstractRankEvaluator createEvaluator() {
+ protected GenericUDAFAbstractRankEvaluator createWindowingEvaluator() {
return new GenericUDAFRankEvaluator();
}
+ private GenericUDAFEvaluator getHypotheticalSetEvaluator(ObjectInspector[] parameters) throws SemanticException {
+ if (parameters.length % 4 != 0) {
+ throw new UDFArgumentTypeException(parameters.length,
+ "Invalid number of parameters: " +
+ "the number of hypothetical direct arguments must match the number of ordering columns");
+ }
+
+ for (int i = 0; i < parameters.length / 4; ++i) {
+ supportsCompare(parameters, 4 * i);
+ supportsCompare(parameters, 4 * i + 1);
+ }
+
+ return createHypotheticalSetEvaluator();
+ }
+
+ protected GenericUDAFHypotheticalSetRankEvaluator createHypotheticalSetEvaluator() {
+ return new GenericUDAFHypotheticalSetRankEvaluator();
+ }
+
+ private void supportsCompare(ObjectInspector[] parameters, int i2) throws UDFArgumentTypeException {
+ ObjectInspector oi = parameters[i2];
+ if (!ObjectInspectorUtils.compareSupported(oi)) {
+ throw new UDFArgumentTypeException(i2,
+ "Cannot support comparison of map<> type or complex type containing map<>.");
+ }
+ }
+
static class RankBuffer implements AggregationBuffer {
ArrayList<IntWritable> rowNums;
@@ -221,5 +267,210 @@ public class GenericUDAFRank extends AbstractGenericUDAFResolver {
return out;
}
+
+
+ /**
+ * Hypothetical rank calculation.
+ * Calculates the rank of a hypothetical row specified by the arguments of the
+ * function in a group of values specified by the order by clause.
+ * SELECT rank(expression1[, expressionn]*) WITHIN GROUP (ORDER BY col1[, coln]*)
+ * (the number of rows where col1 < expression1 [and coln < expressionn]*) + 1
+ */
+ public static class GenericUDAFHypotheticalSetRankEvaluator extends GenericUDAFEvaluator {
+ public static final String RANK_FIELD = "rank";
+ public static final String COUNT_FIELD = "count";
+ public static final ObjectInspector PARTIAL_RANK_OI = ObjectInspectorFactory.getStandardStructObjectInspector(
+ asList(RANK_FIELD, COUNT_FIELD),
+ asList(writableLongObjectInspector,
+ writableLongObjectInspector));
+
+ protected static class HypotheticalSetRankBuffer extends AbstractAggregationBuffer {
+ protected long rank = 0;
+ protected long rowCount = 0;
+
+ @Override
+ public int estimate() {
+ return JavaDataModel.PRIMITIVES2 * 2;
+ }
+ }
+
+ protected static class RankAssets {
+ private final ObjectInspector commonInputOI;
+ private final ObjectInspectorConverters.Converter directArgumentConverter;
+ private final ObjectInspectorConverters.Converter inputConverter;
+ protected final int order;
+ private final NullOrdering nullOrdering;
+
+ public RankAssets(ObjectInspector commonInputOI,
+ ObjectInspectorConverters.Converter directArgumentConverter,
+ ObjectInspectorConverters.Converter inputConverter,
+ int order, NullOrdering nullOrdering) {
+ this.commonInputOI = commonInputOI;
+ this.directArgumentConverter = directArgumentConverter;
+ this.inputConverter = inputConverter;
+ this.order = order;
+ this.nullOrdering = nullOrdering;
+ }
+
+ public int compare(Object inputValue, Object directArgumentValue) {
+ return ObjectInspectorUtils.compare(inputConverter.convert(inputValue), commonInputOI,
+ directArgumentConverter.convert(directArgumentValue), commonInputOI,
+ new FullMapEqualComparer(), nullOrdering.getNullValueOption());
+ }
+ }
+
+ public GenericUDAFHypotheticalSetRankEvaluator() {
+ this(false, PARTIAL_RANK_OI, writableLongObjectInspector);
+ }
+
+ public GenericUDAFHypotheticalSetRankEvaluator(
+ boolean allowEquality, ObjectInspector partialOutputOI, ObjectInspector finalOI) {
+ this.allowEquality = allowEquality;
+ this.partialOutputOI = partialOutputOI;
+ this.finalOI = finalOI;
+ }
+
+ private final transient boolean allowEquality;
+ private final transient ObjectInspector partialOutputOI;
+ private final transient ObjectInspector finalOI;
+ private transient List<RankAssets> rankAssetsList;
+ private transient StructObjectInspector partialInputOI;
+ private transient StructField partialInputRank;
+ private transient StructField partialInputCount;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ rankAssetsList = new ArrayList<>(parameters.length / 4);
+ for (int i = 0; i < parameters.length / 4; ++i) {
+ TypeInfo directArgumentType = TypeInfoUtils.getTypeInfoFromObjectInspector(parameters[4 * i]);
+ TypeInfo inputType = TypeInfoUtils.getTypeInfoFromObjectInspector(parameters[4 * i + 1]);
+ TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForComparison(inputType, directArgumentType);
+ ObjectInspector commonInputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo);
+ rankAssetsList.add(new RankAssets(
+ commonInputOI,
+ ObjectInspectorConverters.getConverter(parameters[4 * i], commonInputOI),
+ ObjectInspectorConverters.getConverter(parameters[4 * i + 1], commonInputOI),
+ ((WritableConstantIntObjectInspector) parameters[4 * i + 2]).
+ getWritableConstantValue().get(),
+ NullOrdering.fromCode(((WritableConstantIntObjectInspector) parameters[4 * i + 3]).
+ getWritableConstantValue().get())));
+ }
+ } else {
+ initPartial2AndFinalOI(parameters);
+ }
+
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ return partialOutputOI;
+ }
+
+ return finalOI;
+ }
+
+ protected void initPartial2AndFinalOI(ObjectInspector[] parameters) {
+ partialInputOI = (StructObjectInspector) parameters[0];
+ partialInputRank = partialInputOI.getStructFieldRef(RANK_FIELD);
+ partialInputCount = partialInputOI.getStructFieldRef(COUNT_FIELD);
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ return new HypotheticalSetRankBuffer();
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ rankBuffer.rank = 0;
+ rankBuffer.rowCount = 0;
+ }
+
+ protected static class CompareResult {
+ private final int compareResult;
+ private final int order;
+
+ public CompareResult(int compareResult, int order) {
+ this.compareResult = compareResult;
+ this.order = order;
+ }
+
+ public int getCompareResult() {
+ return compareResult;
+ }
+
+ public int getOrder() {
+ return order;
+ }
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ rankBuffer.rowCount++;
+
+ CompareResult compareResult = compare(parameters);
+
+ if (compareResult.getCompareResult() == 0) {
+ if (allowEquality) {
+ rankBuffer.rank++;
+ }
+ return;
+ }
+
+ if (compareResult.getOrder() == ASCENDING_CODE && compareResult.getCompareResult() < 0 ||
+ compareResult.getOrder() == DESCENDING_CODE && compareResult.getCompareResult() > 0) {
+ rankBuffer.rank++;
+ }
+ }
+
+ protected CompareResult compare(Object[] parameters) {
+ int i = 0;
+ int c = 0;
+ for (RankAssets rankAssets : rankAssetsList) {
+ c = rankAssets.compare(parameters[4 * i + 1], parameters[4 * i]);
+ if (c != 0) {
+ break;
+ }
+ ++i;
+ }
+
+ if (c == 0) {
+ return new CompareResult(c, -1);
+ }
+
+ return new CompareResult(c, rankAssetsList.get(i).order);
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ LongWritable[] result = new LongWritable[2];
+ result[0] = new LongWritable(rankBuffer.rank + 1);
+ result[1] = new LongWritable(rankBuffer.rowCount);
+ return result;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial == null) {
+ return;
+ }
+
+ Object objRank = partialInputOI.getStructFieldData(partial, partialInputRank);
+ Object objCount = partialInputOI.getStructFieldData(partial, partialInputCount);
+
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ rankBuffer.rank += ((LongWritable)objRank).get() - 1;
+ rankBuffer.rowCount += ((LongWritable)objCount).get();
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ HypotheticalSetRankBuffer rankBuffer = (HypotheticalSetRankBuffer) agg;
+ return new LongWritable(rankBuffer.rank + 1);
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java b/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
new file mode 100644
index 0000000..6bf1db2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.util;
+
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption;
+
+/**
+ * Enum for converting different Null ordering description types.
+ */
+public enum NullOrdering {
+ NULLS_FIRST(1, HiveParser.TOK_NULLS_FIRST, NullValueOption.MAXVALUE),
+ NULLS_LAST(0, HiveParser.TOK_NULLS_LAST, NullValueOption.MINVALUE);
+
+ NullOrdering(int code, int token, NullValueOption nullValueOption) {
+ this.code = code;
+ this.token = token;
+ this.nullValueOption = nullValueOption;
+ }
+
+ private final int code;
+ private final int token;
+ private final NullValueOption nullValueOption;
+
+ public static NullOrdering fromToken(int token) {
+ for (NullOrdering nullOrdering : NullOrdering.values()) {
+ if (nullOrdering.token == token) {
+ return nullOrdering;
+ }
+ }
+ throw new EnumConstantNotPresentException(NullOrdering.class, "No enum constant present with token " + token);
+ }
+
+ public static NullOrdering fromCode(int code) {
+ for (NullOrdering nullOrdering : NullOrdering.values()) {
+ if (nullOrdering.code == code) {
+ return nullOrdering;
+ }
+ }
+ throw new EnumConstantNotPresentException(NullOrdering.class, "No enum constant present with code " + code);
+ }
+
+ public int getCode() {
+ return code;
+ }
+
+ public int getToken() {
+ return token;
+ }
+
+ public NullValueOption getNullValueOption() {
+ return nullValueOption;
+ }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
index dead3ec..cd6c2ee 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
@@ -429,10 +429,6 @@ public class TestFunctionRegistry {
@Test
public void testImpliesOrder() throws Exception {
- Assert.assertTrue(FunctionRegistry.impliesOrder("rank"));
- Assert.assertTrue(FunctionRegistry.impliesOrder("dense_rank"));
- Assert.assertTrue(FunctionRegistry.impliesOrder("percent_rank"));
- Assert.assertTrue(FunctionRegistry.impliesOrder("cume_dist"));
Assert.assertTrue(FunctionRegistry.impliesOrder("first_value"));
Assert.assertTrue(FunctionRegistry.impliesOrder("last_value"));
Assert.assertTrue(FunctionRegistry.impliesOrder("lead"));
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java
index 9d44ed8..fb7699e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseWithinGroupClause.java
@@ -49,31 +49,22 @@ public class TestParseWithinGroupClause {
ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2);
assertEquals(HiveParser.TOK_WITHIN_GROUP, withinGroupNode.getType());
- ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0);
- assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType());
- }
- @Test
- public void testParsePercentileContAsc() throws Exception {
- ASTNode tree = parseDriver.parseSelect(
- "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val ASC) FROM src", null);
- ASTNode selExprNode = (ASTNode) tree.getChild(0);
- ASTNode functionNode = (ASTNode) selExprNode.getChild(0);
- ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2);
- ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0);
+ ASTNode orderByNode = (ASTNode) withinGroupNode.getChild(0);
+ assertEquals(HiveParser.TOK_ORDERBY, orderByNode.getType());
+
+ ASTNode tabSortColNameNode = (ASTNode) orderByNode.getChild(0);
assertEquals(HiveParser.TOK_TABSORTCOLNAMEASC, tabSortColNameNode.getType());
}
@Test
- public void testParsePercentileContDesc() throws Exception {
+ public void testParseMultipleColumnRefs() throws Exception {
ASTNode tree = parseDriver.parseSelect(
- "SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY val DESC) FROM src", null);
- ASTNode selExpr = (ASTNode) tree.getChild(0);
- ASTNode function = (ASTNode) selExpr.getChild(0);
+ "SELECT rank(3, 4) WITHIN GROUP (ORDER BY val, val2) FROM src", null);
ASTNode selExprNode = (ASTNode) tree.getChild(0);
ASTNode functionNode = (ASTNode) selExprNode.getChild(0);
- ASTNode withinGroupNode = (ASTNode) functionNode.getChild(2);
- ASTNode tabSortColNameNode = (ASTNode) withinGroupNode.getChild(0);
- assertEquals(HiveParser.TOK_TABSORTCOLNAMEDESC, tabSortColNameNode.getType());
+ ASTNode withinGroupNode = (ASTNode) functionNode.getChild(3);
+ ASTNode orderByNode = (ASTNode) withinGroupNode.getChild(0);
+ assertEquals(2, orderByNode.getChildCount());
}
}
diff --git a/ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q b/ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q
new file mode 100644
index 0000000..6b5f376
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/hypothetical_set_aggregates.q
@@ -0,0 +1,315 @@
+DESCRIBE FUNCTION rank;
+DESCRIBE FUNCTION EXTENDED rank;
+DESCRIBE FUNCTION dense_rank;
+DESCRIBE FUNCTION EXTENDED dense_rank;
+DESCRIBE FUNCTION percent_rank;
+DESCRIBE FUNCTION EXTENDED percent_rank;
+DESCRIBE FUNCTION cume_dist;
+DESCRIBE FUNCTION EXTENDED cume_dist;
+
+
+CREATE TABLE t_test (
+ col1 int,
+ col2 int
+);
+INSERT INTO t_test VALUES
+(NULL, NULL),
+(3, 0),
+(5, 1),
+(5, 1),
+(5, 2),
+(5, 3),
+(10, 20.0),
+(NULL, NULL),
+(NULL, NULL),
+(11, 10.0),
+(15, 7.0),
+(15, 15.0),
+(15, 16.0),
+(8, 8.0),
+(7, 7.0),
+(8, 8.0),
+(NULL, NULL);
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+
+select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+
+select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test;
+
+select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test;
+
+DROP TABLE t_test;
diff --git a/ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out b/ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out
new file mode 100644
index 0000000..3ea6f1f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/hypothetical_set_aggregates.q.out
@@ -0,0 +1,762 @@
+PREHOOK: query: DESCRIBE FUNCTION rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION rank
+POSTHOOK: type: DESCFUNCTION
+rank(x)
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED rank
+POSTHOOK: type: DESCFUNCTION
+rank(x)
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRank
+Function type:BUILTIN
+PREHOOK: query: DESCRIBE FUNCTION dense_rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION dense_rank
+POSTHOOK: type: DESCFUNCTION
+dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dense_rank
+POSTHOOK: type: DESCFUNCTION
+dense_rank(x) The difference between RANK and DENSE_RANK is that DENSE_RANK leaves no gaps in ranking sequence when there are ties. That is, if you were ranking a competition using DENSE_RANK and had three people tie for second place, you would say that all three were in second place and that the next person came in third.
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFDenseRank
+Function type:BUILTIN
+PREHOOK: query: DESCRIBE FUNCTION percent_rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percent_rank
+POSTHOOK: type: DESCFUNCTION
+percent_rank(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather than row counts in its numerator. PERCENT_RANK of a row is calculated as: (rank of row in its partition - 1) / (number of rows in the partition - 1)
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percent_rank
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percent_rank
+POSTHOOK: type: DESCFUNCTION
+percent_rank(x) PERCENT_RANK is similar to CUME_DIST, but it uses rank values rather than row counts in its numerator. PERCENT_RANK of a row is calculated as: (rank of row in its partition - 1) / (number of rows in the partition - 1)
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentRank
+Function type:BUILTIN
+PREHOOK: query: DESCRIBE FUNCTION cume_dist
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION cume_dist
+POSTHOOK: type: DESCFUNCTION
+cume_dist(x) - The CUME_DIST function (defined as the inverse of percentile in some statistical books) computes the position of a specified value relative to a set of values. To compute the CUME_DIST of a value x in a set S of size N, you use the formula: CUME_DIST(x) = number of values in S coming before and including x in the specified order/ N
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED cume_dist
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED cume_dist
+POSTHOOK: type: DESCFUNCTION
+cume_dist(x) - The CUME_DIST function (defined as the inverse of percentile in some statistical books) computes the position of a specified value relative to a set of values. To compute the CUME_DIST of a value x in a set S of size N, you use the formula: CUME_DIST(x) = number of values in S coming before and including x in the specified order/ N
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCumeDist
+Function type:BUILTIN
+PREHOOK: query: CREATE TABLE t_test (
+ col1 int,
+ col2 int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_test
+POSTHOOK: query: CREATE TABLE t_test (
+ col1 int,
+ col2 int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_test
+PREHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL),
+(3, 0),
+(5, 1),
+(5, 1),
+(5, 2),
+(5, 3),
+(10, 20.0),
+(NULL, NULL),
+(NULL, NULL),
+(11, 10.0),
+(15, 7.0),
+(15, 15.0),
+(15, 16.0),
+(8, 8.0),
+(7, 7.0),
+(8, 8.0),
+(NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_test
+POSTHOOK: query: INSERT INTO t_test VALUES
+(NULL, NULL),
+(3, 0),
+(5, 1),
+(5, 1),
+(5, 2),
+(5, 3),
+(10, 20.0),
+(NULL, NULL),
+(NULL, NULL),
+(11, 10.0),
+(15, 7.0),
+(15, 15.0),
+(15, 16.0),
+(8, 8.0),
+(7, 7.0),
+(8, 8.0),
+(NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_test
+POSTHOOK: Lineage: t_test.col1 SCRIPT []
+POSTHOOK: Lineage: t_test.col2 SCRIPT []
+PREHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 5 6 6 10 10 11
+PREHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 6 6 9 10 10 11
+PREHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 2 3 3 4 4 5
+PREHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 3 3 3 6 6 7
+PREHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471
+PREHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222
+PREHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 5 6 6 10 10 11
+PREHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 6 6 9 10 10 11
+PREHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 2 3 3 4 4 5
+PREHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 3 3 3 6 6 7
+PREHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471
+PREHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222
+PREHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 5 6 6 10 10 11
+PREHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 6 6 9 10 10 11
+PREHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 2 3 3 4 4 5
+PREHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 3 3 3 6 6 7
+PREHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471
+PREHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222
+PREHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1) WITHIN GROUP (ORDER BY col1),
+rank(2) WITHIN GROUP (ORDER BY col1),
+rank(3) WITHIN GROUP (ORDER BY col1),
+rank(4) WITHIN GROUP (ORDER BY col1),
+rank(5) WITHIN GROUP (ORDER BY col1),
+rank(6) WITHIN GROUP (ORDER BY col1),
+rank(7) WITHIN GROUP (ORDER BY col1),
+rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 5 6 6 10 10 11
+PREHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+rank(1, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(2, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(3, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(4, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(5, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(6, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(7, 3) WITHIN GROUP (ORDER BY col1, col2),
+rank(8, 3) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+5 5 6 6 9 10 10 11
+PREHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1) WITHIN GROUP (ORDER BY col1),
+dense_rank(2) WITHIN GROUP (ORDER BY col1),
+dense_rank(3) WITHIN GROUP (ORDER BY col1),
+dense_rank(4) WITHIN GROUP (ORDER BY col1),
+dense_rank(5) WITHIN GROUP (ORDER BY col1),
+dense_rank(6) WITHIN GROUP (ORDER BY col1),
+dense_rank(7) WITHIN GROUP (ORDER BY col1),
+dense_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 2 3 3 4 4 5
+PREHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+dense_rank(1, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(2, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(3, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(4, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(5, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(6, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(7, 1) WITHIN GROUP (ORDER BY col1, col2),
+dense_rank(8, 1) WITHIN GROUP (ORDER BY col1, col2)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+2 2 3 3 3 6 6 7
+PREHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+percent_rank(1) WITHIN GROUP (ORDER BY col1),
+percent_rank(2) WITHIN GROUP (ORDER BY col1),
+percent_rank(3) WITHIN GROUP (ORDER BY col1),
+percent_rank(4) WITHIN GROUP (ORDER BY col1),
+percent_rank(5) WITHIN GROUP (ORDER BY col1),
+percent_rank(6) WITHIN GROUP (ORDER BY col1),
+percent_rank(7) WITHIN GROUP (ORDER BY col1),
+percent_rank(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.23529411764705882 0.23529411764705882 0.23529411764705882 0.29411764705882354 0.29411764705882354 0.5294117647058824 0.5294117647058824 0.5882352941176471
+PREHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_test
+#### A masked pattern was here ####
+POSTHOOK: query: select
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(2) WITHIN GROUP (ORDER BY col1),
+cume_dist(3) WITHIN GROUP (ORDER BY col1),
+cume_dist(4) WITHIN GROUP (ORDER BY col1),
+cume_dist(5) WITHIN GROUP (ORDER BY col1),
+cume_dist(6) WITHIN GROUP (ORDER BY col1),
+cume_dist(7) WITHIN GROUP (ORDER BY col1),
+cume_dist(8) WITHIN GROUP (ORDER BY col1)
+from t_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_test
+#### A masked pattern was here ####
+0.6111111111111112 0.2777777777777778 0.3333333333333333 0.3333333333333333 0.5555555555555556 0.5555555555555556 0.6111111111111112 0.7222222222222222
+PREHOOK: query: DROP TABLE t_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t_test
+PREHOOK: Output: default@t_test
+POSTHOOK: query: DROP TABLE t_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t_test
+POSTHOOK: Output: default@t_test
diff --git a/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out
index f12cb6c..5b02d24 100644
--- a/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out
+++ b/ql/src/test/results/clientpositive/udaf_percentile_cont.q.out
@@ -508,23 +508,23 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: percentile_cont(_col0, 0), percentile_cont(_col0, 0.2), percentile_cont(_col0, 0.2, 1), percentile_cont(_col0, 0.2, 0)
+ aggregations: percentile_cont(_col0, 0), percentile_cont(_col0, 0.2), percentile_cont(0.2, _col0, 1, 0), percentile_cont(0.2, _col0, 1, 1), percentile_cont(0.2, _col0, 0, 0), percentile_cont(0.2, _col0, 0, 1)
minReductionHashAggr: 0.99
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col1 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col2 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col3 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>)
+ Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col1 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col2 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col3 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col4 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isa [...]
Reduce Operator Tree:
Group By Operator
- aggregations: percentile_cont(VALUE._col0), percentile_cont(VALUE._col1), percentile_cont(VALUE._col2), percentile_cont(VALUE._col3)
+ aggregations: percentile_cont(VALUE._col0), percentile_cont(VALUE._col1), percentile_cont(VALUE._col2), percentile_cont(VALUE._col3), percentile_cont(VALUE._col4), percentile_cont(VALUE._col5)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double)
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col4 (type: double), _col5 (type: double), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out
index d10fee5..41de8a2 100644
--- a/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out
+++ b/ql/src/test/results/clientpositive/udaf_percentile_disc.q.out
@@ -508,23 +508,23 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 17 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: percentile_disc(_col0, 0), percentile_disc(_col0, 0.2), percentile_disc(_col0, 0.2, 1), percentile_disc(_col0, 0.2, 0)
+ aggregations: percentile_disc(_col0, 0), percentile_disc(_col0, 0.2), percentile_disc(0.2, _col0, 1, 0), percentile_disc(0.2, _col0, 1, 1), percentile_disc(0.2, _col0, 0, 0), percentile_disc(0.2, _col0, 0, 1)
minReductionHashAggr: 0.99
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 3152 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col1 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col2 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col3 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>)
+ Statistics: Num rows: 1 Data size: 4728 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col1 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col2 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col3 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isascending:boolean>), _col4 (type: struct<counts:map<bigint,bigint>,percentiles:array<double>,isa [...]
Reduce Operator Tree:
Group By Operator
- aggregations: percentile_disc(VALUE._col0), percentile_disc(VALUE._col1), percentile_disc(VALUE._col2), percentile_disc(VALUE._col3)
+ aggregations: percentile_disc(VALUE._col0), percentile_disc(VALUE._col1), percentile_disc(VALUE._col2), percentile_disc(VALUE._col3), percentile_disc(VALUE._col4), percentile_disc(VALUE._col5)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col2 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col3 (type: double), _col3 (type: double), _col3 (type: double)
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col2 (type: double), (_col2 = _col1) (type: boolean), _col2 (type: double), (_col2 = _col1) (type: boolean), _col4 (type: double), _col5 (type: double), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator