You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/02/09 07:32:27 UTC
[hive] branch master updated: HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7082fd1dfd0 HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)
7082fd1dfd0 is described below
commit 7082fd1dfd087c99e6f00a7a0e95a30e198fede8
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Thu Feb 9 08:32:19 2023 +0100
HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)
---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 61 ++++++---
.../apache/hadoop/hive/ql/parse/RowResolver.java | 34 +++--
.../queries/clientpositive/distinct_order_by.q | 17 +++
.../queries/clientpositive/distinct_windowing_2.q | 35 +++++
.../test/queries/clientpositive/order_by_expr_2.q | 47 +++++++
.../clientpositive/llap/distinct_order_by.q.out | 62 +++++++++
.../clientpositive/llap/distinct_windowing_2.q.out | 123 +++++++++++++++++
.../clientpositive/llap/order_by_expr_1.q.out | 4 +-
.../clientpositive/llap/order_by_expr_2.q.out | 149 +++++++++++++++++++++
.../results/clientpositive/llap/subquery_in.q.out | 6 +-
.../perf/tpcds30tb/tez/cbo_query47.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query57.q.out | 2 +-
.../perf/tpcds30tb/tez/cbo_query79.q.out | 4 +-
.../perf/tpcds30tb/tez/cbo_query89.q.out | 2 +-
.../perf/tpcds30tb/tez/query79.q.out | 30 ++---
15 files changed, 526 insertions(+), 52 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 7f274cadcbf..a500d2be957 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hive.ql.parse;
-import com.google.common.base.Function;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableBiMap;
import com.google.common.collect.ImmutableList;
@@ -315,6 +314,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
import org.apache.hadoop.hive.ql.util.DirectionUtils;
import org.apache.hadoop.hive.ql.util.NullOrdering;
import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -4795,7 +4795,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// columns, and in fact adding all columns would change the behavior of
// DISTINCT, so we bypass this logic.
if ((obAST != null || sbAST != null)
- && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI
+ && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI)
&& !isAllColRefRewrite) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be
@@ -4807,7 +4807,34 @@ public class CalcitePlanner extends SemanticAnalyzer {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error));
}
}
- originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
+
+ originalRR = outputRR.duplicate();
+ appendInputColumns(srcRel, columnList, outputRR, inputRR);
+
+ ASTNode obOrSbAST = obAST != null ? obAST : sbAST;
+ for (int i = 0; i < obOrSbAST.getChildCount(); ++i) {
+ ASTNode obExprAST = (ASTNode) obOrSbAST.getChild(i);
+ ASTNode nullObASTExpr = (ASTNode) obExprAST.getChild(0);
+ ASTNode ref = (ASTNode) nullObASTExpr.getChild(0);
+ RexNode obRex;
+ try {
+ Map<ASTNode, RexNode> astToExprNDescMap = genAllRexNode(ref, inputRR, cluster.getRexBuilder());
+ obRex = astToExprNDescMap.get(ref);
+ } catch (SemanticException ex) {
+ continue;
+ }
+ if (obRex instanceof RexInputRef) {
+ // Order by key is a projected column reference
+ continue;
+ }
+ columnList.add(obRex);
+
+ String field = getColumnInternalName(outputRR.getColumnInfos().size());
+ ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+ TypeConverter.convert(obRex.getType()));
+ outputRR.putExpression(ref, new ColumnInfo(field, oi, "", false));
+ }
+
outputRel = genSelectRelNode(columnList, outputRR, srcRel);
// outputRel is the generated augmented select with extra unselected
// columns, and originalRR is the original generated select
@@ -4815,7 +4842,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
} else {
if (qbp.getQualifyExprForClause(dest) != null) {
int originalColumnListSize = columnList.size();
- originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
+ originalRR = outputRR.duplicate();
+ appendInputColumns(srcRel, columnList, outputRR, inputRR);
RelNode combinedProject = genSelectRelNode(columnList, outputRR, srcRel);
RelNode qualifyRel = genQualifyLogicalPlan(qb, combinedProject);
List<RexNode> topProjectColumnList = new ArrayList<>(originalColumnListSize);
@@ -4836,11 +4864,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
outputRel, groupSet, null, new ArrayList<AggregateCall>());
RowResolver groupByOutputRowResolver = new RowResolver();
+ List<ASTNode> gbyKeyExpressions = getGroupByForClause(qbp, selClauseName);
for (int i = 0; i < outputRR.getColumnInfos().size(); i++) {
ColumnInfo colInfo = outputRR.getColumnInfos().get(i);
ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(),
colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
+ if (gbyKeyExpressions != null && gbyKeyExpressions.size() == outputRR.getColumnInfos().size()) {
+ groupByOutputRowResolver.putExpression(gbyKeyExpressions.get(i), colInfo);
+ }
}
relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
@@ -4850,23 +4882,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
return new Pair<>(outputRel, outputRR);
}
- private RowResolver appendInputColumns(RelNode srcRel, List<RexNode> columnList, RowResolver outputRR, RowResolver inputRR) throws SemanticException {
- RowResolver originalRR;
+ private void appendInputColumns(
+ RelNode srcRel, List<RexNode> columnList, RowResolver outputRR, RowResolver inputRR)
+ throws SemanticException {
List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(),
- new Function<RelDataTypeField, RexNode>() {
- @Override
- public RexNode apply(RelDataTypeField input) {
- return new RexInputRef(input.getIndex(), input.getType());
- }
- });
- originalRR = outputRR.duplicate();
+ input -> new RexInputRef(input.getIndex(), input.getType()));
for (int i = 0; i < inputRR.getColumnInfos().size(); i++) {
ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i));
- String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos()
- .size() + i);
+ String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos().size());
colInfo.setInternalName(internalName);
- // if there is any confict, then we do not generate it in the new select
- // otherwise, we add it into the calciteColLst and generate the new select
+ // if there is any conflict, then we do not generate it in the new select
+ // otherwise we add it into the calciteColLst and generate the new select
if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName,
colInfo)) {
LOG.trace("Column already present in RR. skipping.");
@@ -4874,7 +4900,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
columnList.add(originalInputRefs.get(i));
}
}
- return originalRR;
}
Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
index c58a0a076f0..715e9bd6c88 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
@@ -81,7 +81,10 @@ public class RowResolver implements Serializable{
public void putExpression(ASTNode node, ColumnInfo colInfo) {
String treeAsString = node.toStringTree();
expressionMap.put(treeAsString, node);
- put("", treeAsString, colInfo);
+ if (!putInternal("", treeAsString, colInfo)) {
+ return;
+ }
+ colInfo.setAlias(treeAsString);
}
/**
@@ -100,18 +103,26 @@ public class RowResolver implements Serializable{
return expressionMap.get(node.toStringTree());
}
- public void put(String tab_alias, String col_alias, ColumnInfo colInfo) {
- if (!addMappingOnly(tab_alias, col_alias, colInfo)) {
+ public void put(String tabAlias, String colAlias, ColumnInfo colInfo) {
+ if (!putInternal(tabAlias, colAlias, colInfo)) {
+ return;
+ }
+ if (colAlias != null) {
+ colInfo.setAlias(colAlias.toLowerCase());
+ }
+ }
+
+ private boolean putInternal(String tabAlias, String colAlias, ColumnInfo colInfo) {
+ if (!addMappingOnly(tabAlias, colAlias, colInfo)) {
//Make sure that the table alias and column alias are stored
//in the column info
- if (tab_alias != null) {
- colInfo.setTabAlias(tab_alias.toLowerCase());
- }
- if (col_alias != null) {
- colInfo.setAlias(col_alias.toLowerCase());
+ if (tabAlias != null) {
+ colInfo.setTabAlias(tabAlias.toLowerCase());
}
rowSchema.getSignature().add(colInfo);
+ return true;
}
+ return false;
}
private void keepAmbiguousInfo(String col_alias, String tab_alias) {
@@ -481,11 +492,16 @@ public class RowResolver implements Serializable{
public RowResolver duplicate() {
RowResolver resolver = new RowResolver();
resolver.rowSchema = new RowSchema(rowSchema);
- resolver.rslvMap.putAll(rslvMap);
+ for (Map.Entry<String, Map<String, ColumnInfo>> entry : rslvMap.entrySet()) {
+ resolver.rslvMap.put(entry.getKey(), new LinkedHashMap<>(entry.getValue()));
+ }
resolver.invRslvMap.putAll(invRslvMap);
resolver.altInvRslvMap.putAll(altInvRslvMap);
resolver.expressionMap.putAll(expressionMap);
resolver.isExprResolver = isExprResolver;
+ for (Map.Entry<String, Map<String, String>> entry : ambiguousColumns.entrySet()) {
+ resolver.ambiguousColumns.put(entry.getKey(), new LinkedHashMap<>(entry.getValue()));
+ }
resolver.ambiguousColumns.putAll(ambiguousColumns);
resolver.checkForAmbiguity = checkForAmbiguity;
return resolver;
diff --git a/ql/src/test/queries/clientpositive/distinct_order_by.q b/ql/src/test/queries/clientpositive/distinct_order_by.q
new file mode 100644
index 00000000000..92f73e7ad3d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_order_by.q
@@ -0,0 +1,17 @@
+create table t1 (a bigint, b int);
+
+insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2);
+
+explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b;
+
+
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b;
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_2.q b/ql/src/test/queries/clientpositive/distinct_windowing_2.q
new file mode 100644
index 00000000000..8eb26fa908a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing_2.q
@@ -0,0 +1,35 @@
+-- Test Order By clause has expressions when query has distinct + window func and select expressions has alias defined
+create table t1 (a bigint, b int, c int);
+create table t2 (c bigint, d int);
+
+insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2);
+
+select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c;
+
+select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b);
+
+
+
+
+explain cbo
+select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c;
+
+explain cbo
+select distinct b + d as b_plus_d,
+ rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d;
diff --git a/ql/src/test/queries/clientpositive/order_by_expr_2.q b/ql/src/test/queries/clientpositive/order_by_expr_2.q
index dcc2a668aff..123454114e3 100644
--- a/ql/src/test/queries/clientpositive/order_by_expr_2.q
+++ b/ql/src/test/queries/clientpositive/order_by_expr_2.q
@@ -9,3 +9,50 @@ select a as b, b as a from t_n14 order by t_n14.a, t_n14.b;
select a as b from t_n14 order by b;
select a as b from t_n14 order by 0-a;
select a,b,count(*),a+b from t_n14 group by a, b order by a+b, a;
+
+create table store(store_name string, store_sqft int);
+
+insert into store values ('HQ', 3), ('hq', 4);
+
+-- Order by expression has string literal
+-- literal is upper case both in project and order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC;
+
+-- literal has different case in project and order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC;
+
+-- expression is referenced by alias in Order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC;
diff --git a/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out b/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out
new file mode 100644
index 00000000000..f8b3fb0c667
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table t1 (a bigint, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a bigint, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+PREHOOK: query: explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(alias_b=[$0], b=[$1])
+ HiveSortLimit(sort0=[$2], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ HiveProject(alias_b=[$0], b=[$1], (+ (tok_table_or_col b) 2)1=[+($1, 2)])
+ HiveAggregate(group=[{0, 1}])
+ HiveProject($f0=[+($1, 2)], $f1=[$1])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+3 1
+4 2
diff --git a/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out b/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out
new file mode 100644
index 00000000000..27c33b5681f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out
@@ -0,0 +1,123 @@
+PREHOOK: query: create table t1 (a bigint, b int, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a bigint, b int, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (c bigint, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (c bigint, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1
+2 2
+PREHOOK: query: select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1
+2 2
+PREHOOK: query: explain cbo
+select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct c as c_alias,
+ rank() over (order by b) as rank_alias
+from t1
+order by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC])
+ HiveProject(c_alias=[$0], rank_alias=[$1])
+ HiveAggregate(group=[{0, 1}])
+ HiveProject(c_alias=[$2], rank_alias=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: explain cbo
+select distinct b + d as b_plus_d,
+ rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct b + d as b_plus_d,
+ rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC])
+ HiveProject(b_plus_d=[$0], rank_alias=[$1])
+ HiveAggregate(group=[{0, 1}])
+ HiveProject(b_plus_d=[+($1, $3)], rank_alias=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0], b=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(c=[$0], d=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
+
diff --git a/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out b/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
index edbdcf4b04c..22daeb575bf 100644
--- a/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
@@ -349,11 +349,11 @@ STAGE PLANS:
alias: t_n5
Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: b (type: int), (0 - b) (type: int), (a + 1) (type: int)
+ expressions: b (type: int), (a + 1) (type: int), (0 - b) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 9 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col1 (type: int), _col2 (type: int)
+ key expressions: _col2 (type: int), _col1 (type: int)
null sort order: zz
sort order: ++
Statistics: Num rows: 9 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out b/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
index a76cc11943b..78bd0459bc3 100644
--- a/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
@@ -100,3 +100,152 @@ POSTHOOK: Input: default@t_n14
4 5 1 9
3 7 1 10
8 9 1 17
+PREHOOK: query: create table store(store_name string, store_sqft int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@store
+POSTHOOK: query: create table store(store_name string, store_sqft int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@store
+PREHOOK: query: insert into store values ('HQ', 3), ('hq', 4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@store
+POSTHOOK: query: insert into store values ('HQ', 3), ('hq', 4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: store.store_name SCRIPT []
+POSTHOOK: Lineage: store.store_sqft SCRIPT []
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(c0=[$0], c1=[$1], c2=[$2])
+ HiveSortLimit(sort0=[$3], dir0=[ASC])
+ HiveProject(c0=[$0], c1=[$1], c2=[$2], (tok_function case (tok_table_or_col store_name) 'HQ' tok_null (tok_table_or_col store_name))=[$1])
+ HiveAggregate(group=[{0, 1, 2}])
+ HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+ HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+hq hq 4
+HQ NULL 3
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(c0=[$0], c1=[$1], c2=[$2])
+ HiveSortLimit(sort0=[$3], dir0=[ASC])
+ HiveProject(c0=[$0], c1=[$1], c2=[$2], (tok_function case (tok_table_or_col store_name) 'hq' tok_null (tok_table_or_col store_name))0=[CASE(=($0, _UTF-16LE'hq'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)])
+ HiveAggregate(group=[{0, 1, 2}])
+ HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+ HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+HQ NULL 3
+hq hq 4
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$1], dir0=[ASC])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveAggregate(group=[{0, 1, 2}])
+ HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+ HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+ store.store_sqft as c2
+ from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+hq hq 4
+HQ NULL 3
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index a1abcfa8be5..340785d8916 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -3589,11 +3589,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
- outputColumnNames: _col1, _col3, _col4
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col3 (type: string), _col4 (type: int)
+ key expressions: _col0 (type: string), _col2 (type: int)
null sort order: zz
sort order: ++
Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
index 98599b4a2f6..5a04302e5ee 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
@@ -1,7 +1,7 @@
CBO PLAN:
HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], sum_sales=[$4], psum=[$5], nsum=[$6])
HiveSortLimit(sort0=[$7], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($18, $19)])
+ HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($18, $19)])
HiveJoin(condition=[AND(=($12, $0), =($13, $1), =($14, $2), =($15, $3), =($20, $5))], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], -=[-($5, 1)])
HiveFilter(condition=[IS NOT NULL($5)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
index fb6ce66e84d..5f639c2b148 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
@@ -1,7 +1,7 @@
CBO PLAN:
HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_sales=[$4], sum_sales=[$5], psum=[$6], nsum=[$7])
HiveSortLimit(sort0=[$8], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($15, $16)])
+ HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($15, $16)])
HiveJoin(condition=[AND(=($10, $0), =($11, $1), =($12, $2), =($17, $4))], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], -=[-($4, 1)])
HiveFilter(condition=[IS NOT NULL($4)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
index 2cf9a6d816f..9aaa7c2a016 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
@@ -1,11 +1,11 @@
CBO PLAN:
HiveProject(c_last_name=[$0], c_first_name=[$1], _o__c2=[$2], ss_ticket_number=[$3], amt=[$4], profit=[$5])
HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$6], sort3=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
- HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[$8], ss_ticket_number=[$3], amt=[$6], profit=[$7], (tok_function substr (tok_table_or_col s_city) 1 30)=[substr($5, 1, 30)])
+ HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[$7], ss_ticket_number=[$3], amt=[$5], profit=[$6], (tok_function substr (tok_table_or_col s_city) 1 30)=[$7])
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], s_city=[$3], amt=[$4], profit=[$5], substr=[substr($3, 1, 30)])
+ HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], amt=[$4], profit=[$5], substr=[substr($3, 1, 30)])
HiveAggregate(group=[{0, 2, 4, 10}], agg#0=[sum($5)], agg#1=[sum($6)])
HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
index e8ec73f72c3..89fe555e64f 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
@@ -1,7 +1,7 @@
CBO PLAN:
HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7])
HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100])
- HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)])
+ HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($6, $7)])
HiveFilter(condition=[CASE(<>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false)])
HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
index 332b0362261..3ae0064cc2d 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
@@ -181,16 +181,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 5713516612 Data size: 1919324915684 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col2 (type: bigint), _col0 (type: bigint), _col3 (type: varchar(60)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col3, 1, 30) (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 5713516612 Data size: 2413759812276 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: _col2 (type: bigint), _col0 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col3, 1, 30) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5713516612 Data size: 1882402767360 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: bigint)
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: bigint)
- Statistics: Num rows: 5713516612 Data size: 2413759812276 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col2 (type: varchar(60)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: string)
+ Statistics: Num rows: 5713516612 Data size: 1882402767360 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: string)
Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -200,32 +200,32 @@ STAGE PLANS:
keys:
0 KEY.reducesinkkey0 (type: bigint)
1 KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8
+ outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7
input vertices:
0 Map 1
- Statistics: Num rows: 5713516612 Data size: 3410969417364 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Top N Key Operator
sort order: ++++
- keys: _col2 (type: char(30)), _col1 (type: char(20)), substr(_col5, 1, 30) (type: string), _col7 (type: decimal(17,2))
+ keys: _col2 (type: char(30)), _col1 (type: char(20)), _col7 (type: string), _col6 (type: decimal(17,2))
null sort order: zzzz
- Statistics: Num rows: 5713516612 Data size: 3410969417364 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
top n: 100
Select Operator
- expressions: _col2 (type: char(30)), _col1 (type: char(20)), _col8 (type: string), _col3 (type: bigint), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), substr(_col5, 1, 30) (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 5713516612 Data size: 3405255900752 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: _col2 (type: char(30)), _col1 (type: char(20)), _col3 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col6 (type: string), _col5 (type: decimal(17,2))
null sort order: zzzz
sort order: ++++
- Statistics: Num rows: 5713516612 Data size: 3405255900752 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string), _col3 (type: bigint), _col4 (type: decimal(17,2))
+ Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: bigint), _col4 (type: decimal(17,2))
Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: char(30)), KEY.reducesinkkey1 (type: char(20)), VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2))
+ expressions: KEY.reducesinkkey0 (type: char(30)), KEY.reducesinkkey1 (type: char(20)), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
Limit