You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/02/09 07:32:27 UTC

[hive] branch master updated: HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7082fd1dfd0 HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)
7082fd1dfd0 is described below

commit 7082fd1dfd087c99e6f00a7a0e95a30e198fede8
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Thu Feb 9 08:32:19 2023 +0100

    HIVE-26054: Distinct + Groupby with column alias is failing (Krisztian Kasa, reviewed by Laszlo Vegh)
---
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  61 ++++++---
 .../apache/hadoop/hive/ql/parse/RowResolver.java   |  34 +++--
 .../queries/clientpositive/distinct_order_by.q     |  17 +++
 .../queries/clientpositive/distinct_windowing_2.q  |  35 +++++
 .../test/queries/clientpositive/order_by_expr_2.q  |  47 +++++++
 .../clientpositive/llap/distinct_order_by.q.out    |  62 +++++++++
 .../clientpositive/llap/distinct_windowing_2.q.out | 123 +++++++++++++++++
 .../clientpositive/llap/order_by_expr_1.q.out      |   4 +-
 .../clientpositive/llap/order_by_expr_2.q.out      | 149 +++++++++++++++++++++
 .../results/clientpositive/llap/subquery_in.q.out  |   6 +-
 .../perf/tpcds30tb/tez/cbo_query47.q.out           |   2 +-
 .../perf/tpcds30tb/tez/cbo_query57.q.out           |   2 +-
 .../perf/tpcds30tb/tez/cbo_query79.q.out           |   4 +-
 .../perf/tpcds30tb/tez/cbo_query89.q.out           |   2 +-
 .../perf/tpcds30tb/tez/query79.q.out               |  30 ++---
 15 files changed, 526 insertions(+), 52 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 7f274cadcbf..a500d2be957 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
-import com.google.common.base.Function;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ImmutableBiMap;
 import com.google.common.collect.ImmutableList;
@@ -315,6 +314,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
 import org.apache.hadoop.hive.ql.util.DirectionUtils;
 import org.apache.hadoop.hive.ql.util.NullOrdering;
 import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -4795,7 +4795,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
         // columns, and in fact adding all columns would change the behavior of
         // DISTINCT, so we bypass this logic.
         if ((obAST != null || sbAST != null)
-            && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI
+            && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI)
             && !isAllColRefRewrite) {
           // 1. OB Expr sanity test
           // in strict mode, in the presence of order by, limit must be
@@ -4807,7 +4807,34 @@ public class CalcitePlanner extends SemanticAnalyzer {
               throw new SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error));
             }
           }
-          originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
+
+          originalRR = outputRR.duplicate();
+          appendInputColumns(srcRel, columnList, outputRR, inputRR);
+
+          ASTNode obOrSbAST = obAST != null ? obAST : sbAST;
+          for (int i = 0; i < obOrSbAST.getChildCount(); ++i) {
+            ASTNode obExprAST = (ASTNode) obOrSbAST.getChild(i);
+            ASTNode nullObASTExpr = (ASTNode) obExprAST.getChild(0);
+            ASTNode ref = (ASTNode) nullObASTExpr.getChild(0);
+            RexNode obRex;
+            try {
+              Map<ASTNode, RexNode> astToExprNDescMap = genAllRexNode(ref, inputRR, cluster.getRexBuilder());
+              obRex = astToExprNDescMap.get(ref);
+            } catch (SemanticException ex) {
+              continue;
+            }
+            if (obRex instanceof RexInputRef) {
+              // Order by key is a projected column reference
+              continue;
+            }
+            columnList.add(obRex);
+
+            String field = getColumnInternalName(outputRR.getColumnInfos().size());
+            ObjectInspector oi = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
+                    TypeConverter.convert(obRex.getType()));
+            outputRR.putExpression(ref, new ColumnInfo(field, oi, "", false));
+          }
+
           outputRel = genSelectRelNode(columnList, outputRR, srcRel);
           // outputRel is the generated augmented select with extra unselected
           // columns, and originalRR is the original generated select
@@ -4815,7 +4842,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
         } else {
           if (qbp.getQualifyExprForClause(dest) != null) {
             int originalColumnListSize = columnList.size();
-            originalRR = appendInputColumns(srcRel, columnList, outputRR, inputRR);
+            originalRR = outputRR.duplicate();
+            appendInputColumns(srcRel, columnList, outputRR, inputRR);
             RelNode combinedProject = genSelectRelNode(columnList, outputRR, srcRel);
             RelNode qualifyRel = genQualifyLogicalPlan(qb, combinedProject);
             List<RexNode> topProjectColumnList = new ArrayList<>(originalColumnListSize);
@@ -4836,11 +4864,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
         outputRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
               outputRel, groupSet, null, new ArrayList<AggregateCall>());
         RowResolver groupByOutputRowResolver = new RowResolver();
+        List<ASTNode> gbyKeyExpressions = getGroupByForClause(qbp, selClauseName);
         for (int i = 0; i < outputRR.getColumnInfos().size(); i++) {
           ColumnInfo colInfo = outputRR.getColumnInfos().get(i);
           ColumnInfo newColInfo = new ColumnInfo(colInfo.getInternalName(),
               colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
           groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
+          if (gbyKeyExpressions != null && gbyKeyExpressions.size() == outputRR.getColumnInfos().size()) {
+            groupByOutputRowResolver.putExpression(gbyKeyExpressions.get(i), colInfo);
+          }
         }
         relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
         this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
@@ -4850,23 +4882,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return new Pair<>(outputRel, outputRR);
     }
 
-    private RowResolver appendInputColumns(RelNode srcRel, List<RexNode> columnList, RowResolver outputRR, RowResolver inputRR) throws SemanticException {
-      RowResolver originalRR;
+    private void appendInputColumns(
+            RelNode srcRel, List<RexNode> columnList, RowResolver outputRR, RowResolver inputRR)
+            throws SemanticException {
       List<RexNode> originalInputRefs = Lists.transform(srcRel.getRowType().getFieldList(),
-          new Function<RelDataTypeField, RexNode>() {
-            @Override
-            public RexNode apply(RelDataTypeField input) {
-              return new RexInputRef(input.getIndex(), input.getType());
-            }
-          });
-      originalRR = outputRR.duplicate();
+              input -> new RexInputRef(input.getIndex(), input.getType()));
       for (int i = 0; i < inputRR.getColumnInfos().size(); i++) {
         ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i));
-        String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos()
-            .size() + i);
+        String internalName = SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos().size());
         colInfo.setInternalName(internalName);
-        // if there is any confict, then we do not generate it in the new select
-        // otherwise, we add it into the calciteColLst and generate the new select
+        // if there is any conflict, then we do not generate it in the new select
+        // otherwise we add it into the calciteColLst and generate the new select
         if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(), internalName,
             colInfo)) {
           LOG.trace("Column already present in RR. skipping.");
@@ -4874,7 +4900,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
           columnList.add(originalInputRefs.get(i));
         }
       }
-      return originalRR;
     }
 
     Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
index c58a0a076f0..715e9bd6c88 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
@@ -81,7 +81,10 @@ public class RowResolver implements Serializable{
   public void putExpression(ASTNode node, ColumnInfo colInfo) {
     String treeAsString = node.toStringTree();
     expressionMap.put(treeAsString, node);
-    put("", treeAsString, colInfo);
+    if (!putInternal("", treeAsString, colInfo)) {
+      return;
+    }
+    colInfo.setAlias(treeAsString);
   }
 
   /**
@@ -100,18 +103,26 @@ public class RowResolver implements Serializable{
     return expressionMap.get(node.toStringTree());
   }
 
-  public void put(String tab_alias, String col_alias, ColumnInfo colInfo) {
-    if (!addMappingOnly(tab_alias, col_alias, colInfo)) {
+  public void put(String tabAlias, String colAlias, ColumnInfo colInfo) {
+    if (!putInternal(tabAlias, colAlias, colInfo)) {
+      return;
+    }
+    if (colAlias != null) {
+      colInfo.setAlias(colAlias.toLowerCase());
+    }
+  }
+
+  private boolean putInternal(String tabAlias, String colAlias, ColumnInfo colInfo) {
+    if (!addMappingOnly(tabAlias, colAlias, colInfo)) {
       //Make sure that the table alias and column alias are stored
       //in the column info
-      if (tab_alias != null) {
-        colInfo.setTabAlias(tab_alias.toLowerCase());
-      }
-      if (col_alias != null) {
-        colInfo.setAlias(col_alias.toLowerCase());
+      if (tabAlias != null) {
+        colInfo.setTabAlias(tabAlias.toLowerCase());
       }
       rowSchema.getSignature().add(colInfo);
+      return true;
     }
+    return false;
   }
 
   private void keepAmbiguousInfo(String col_alias, String tab_alias) {
@@ -481,11 +492,16 @@ public class RowResolver implements Serializable{
   public RowResolver duplicate() {
     RowResolver resolver = new RowResolver();
     resolver.rowSchema = new RowSchema(rowSchema);
-    resolver.rslvMap.putAll(rslvMap);
+    for (Map.Entry<String, Map<String, ColumnInfo>> entry : rslvMap.entrySet()) {
+      resolver.rslvMap.put(entry.getKey(), new LinkedHashMap<>(entry.getValue()));
+    }
     resolver.invRslvMap.putAll(invRslvMap);
     resolver.altInvRslvMap.putAll(altInvRslvMap);
     resolver.expressionMap.putAll(expressionMap);
     resolver.isExprResolver = isExprResolver;
+    for (Map.Entry<String, Map<String, String>> entry : ambiguousColumns.entrySet()) {
+      resolver.ambiguousColumns.put(entry.getKey(), new LinkedHashMap<>(entry.getValue()));
+    }
     resolver.ambiguousColumns.putAll(ambiguousColumns);
     resolver.checkForAmbiguity = checkForAmbiguity;
     return resolver;
diff --git a/ql/src/test/queries/clientpositive/distinct_order_by.q b/ql/src/test/queries/clientpositive/distinct_order_by.q
new file mode 100644
index 00000000000..92f73e7ad3d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_order_by.q
@@ -0,0 +1,17 @@
+create table t1 (a bigint, b int);
+
+insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2);
+
+explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b;
+
+
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b;
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_2.q b/ql/src/test/queries/clientpositive/distinct_windowing_2.q
new file mode 100644
index 00000000000..8eb26fa908a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing_2.q
@@ -0,0 +1,35 @@
+-- Test Order By clause has expressions when query has distinct + window func and select expressions has alias defined
+create table t1 (a bigint, b int, c int);
+create table t2 (c bigint, d int);
+
+insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2);
+
+select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c;
+
+select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b);
+
+
+
+
+explain cbo
+select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c;
+
+explain cbo
+select distinct b + d as b_plus_d,
+       rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d;
diff --git a/ql/src/test/queries/clientpositive/order_by_expr_2.q b/ql/src/test/queries/clientpositive/order_by_expr_2.q
index dcc2a668aff..123454114e3 100644
--- a/ql/src/test/queries/clientpositive/order_by_expr_2.q
+++ b/ql/src/test/queries/clientpositive/order_by_expr_2.q
@@ -9,3 +9,50 @@ select a as b, b as a from t_n14 order by t_n14.a, t_n14.b;
 select a as b from t_n14 order by b;
 select a as b from t_n14 order by 0-a;
 select a,b,count(*),a+b from t_n14 group by a, b order by a+b, a;
+
+create table store(store_name string, store_sqft int);
+
+insert into store values ('HQ', 3), ('hq', 4);
+
+-- Order by expression has string literal
+-- literal is upper case both in project and order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC;
+
+-- literal has different case in project and order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC;
+
+-- expression is referenced by alias in Order by clause
+explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC;
+
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC;
diff --git a/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out b/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out
new file mode 100644
index 00000000000..f8b3fb0c667
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/distinct_order_by.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table t1 (a bigint, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a bigint, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b) values
+(1, 1),
+(1, 2),
+(2, 2),
+(3, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+PREHOOK: query: explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(alias_b=[$0], b=[$1])
+  HiveSortLimit(sort0=[$2], sort1=[$1], dir0=[ASC], dir1=[ASC])
+    HiveProject(alias_b=[$0], b=[$1], (+ (tok_table_or_col b) 2)1=[+($1, 2)])
+      HiveAggregate(group=[{0, 1}])
+        HiveProject($f0=[+($1, 2)], $f1=[$1])
+          HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct b + 2 as alias_b, b
+from t1
+order by b + 2, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+3	1
+4	2
diff --git a/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out b/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out
new file mode 100644
index 00000000000..27c33b5681f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/distinct_windowing_2.q.out
@@ -0,0 +1,123 @@
+PREHOOK: query: create table t1 (a bigint, b int, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a bigint, b int, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (c bigint, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (c bigint, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(a, b, c) values
+(1, 1, 1),
+(1, 2, 2),
+(2, 2, 2),
+(3, 2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	1
+2	2
+PREHOOK: query: select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by rank() over (order by b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1	1
+2	2
+PREHOOK: query: explain cbo
+select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct c as c_alias,
+       rank() over (order by b) as rank_alias
+from t1
+order by c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC])
+  HiveProject(c_alias=[$0], rank_alias=[$1])
+    HiveAggregate(group=[{0, 1}])
+      HiveProject(c_alias=[$2], rank_alias=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
+        HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: explain cbo
+select distinct b + d as b_plus_d,
+       rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct b + d as b_plus_d,
+       rank() over (order by b) as rank_alias
+from t1
+join t2 on t1.a = t2.c
+order by b + d
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC])
+  HiveProject(b_plus_d=[$0], rank_alias=[$1])
+    HiveAggregate(group=[{0, 1}])
+      HiveProject(b_plus_d=[+($1, $3)], rank_alias=[rank() OVER (PARTITION BY 0 ORDER BY $1 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
+        HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+          HiveProject(a=[$0], b=[$1])
+            HiveFilter(condition=[IS NOT NULL($0)])
+              HiveTableScan(table=[[default, t1]], table:alias=[t1])
+          HiveProject(c=[$0], d=[$1])
+            HiveFilter(condition=[IS NOT NULL($0)])
+              HiveTableScan(table=[[default, t2]], table:alias=[t2])
+
diff --git a/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out b/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
index edbdcf4b04c..22daeb575bf 100644
--- a/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/order_by_expr_1.q.out
@@ -349,11 +349,11 @@ STAGE PLANS:
                   alias: t_n5
                   Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
-                    expressions: b (type: int), (0 - b) (type: int), (a + 1) (type: int)
+                    expressions: b (type: int), (a + 1) (type: int), (0 - b) (type: int)
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 9 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
-                      key expressions: _col1 (type: int), _col2 (type: int)
+                      key expressions: _col2 (type: int), _col1 (type: int)
                       null sort order: zz
                       sort order: ++
                       Statistics: Num rows: 9 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out b/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
index a76cc11943b..78bd0459bc3 100644
--- a/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/order_by_expr_2.q.out
@@ -100,3 +100,152 @@ POSTHOOK: Input: default@t_n14
 4	5	1	9
 3	7	1	10
 8	9	1	17
+PREHOOK: query: create table store(store_name string, store_sqft int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@store
+POSTHOOK: query: create table store(store_name string, store_sqft int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@store
+PREHOOK: query: insert into store values ('HQ', 3), ('hq', 4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@store
+POSTHOOK: query: insert into store values ('HQ', 3), ('hq', 4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@store
+POSTHOOK: Lineage: store.store_name SCRIPT []
+POSTHOOK: Lineage: store.store_sqft SCRIPT []
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(c0=[$0], c1=[$1], c2=[$2])
+  HiveSortLimit(sort0=[$3], dir0=[ASC])
+    HiveProject(c0=[$0], c1=[$1], c2=[$2], (tok_function case (tok_table_or_col store_name) 'HQ' tok_null (tok_table_or_col store_name))=[$1])
+      HiveAggregate(group=[{0, 1, 2}])
+        HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+          HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'HQ' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+hq	hq	4
+HQ	NULL	3
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(c0=[$0], c1=[$1], c2=[$2])
+  HiveSortLimit(sort0=[$3], dir0=[ASC])
+    HiveProject(c0=[$0], c1=[$1], c2=[$2], (tok_function case (tok_table_or_col store_name) 'hq' tok_null (tok_table_or_col store_name))0=[CASE(=($0, _UTF-16LE'hq'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)])
+      HiveAggregate(group=[{0, 1, 2}])
+        HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+          HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by case store_name when 'hq' then null else store_name end ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+HQ	NULL	3
+hq	hq	4
+PREHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$1], dir0=[ASC])
+  HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+    HiveAggregate(group=[{0, 1, 2}])
+      HiveProject($f0=[$0], $f1=[CASE(=($0, _UTF-16LE'HQ'), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], $f2=[$1])
+        HiveTableScan(table=[[default, store]], table:alias=[store])
+
+PREHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store
+#### A masked pattern was here ####
+POSTHOOK: query: select store.store_name as c0, case store_name when 'HQ' then null else store_name end as c1,
+       store.store_sqft as c2
+  from store as store
+group by store.store_name, case store_name when 'HQ' then null else store_name end, store.store_sqft
+order by c1 ASC
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store
+#### A masked pattern was here ####
+hq	hq	4
+HQ	NULL	3
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index a1abcfa8be5..340785d8916 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -3589,11 +3589,11 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
-                  outputColumnNames: _col1, _col3, _col4
+                  expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int)
+                  outputColumnNames: _col0, _col1, _col2
                   Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
-                    key expressions: _col3 (type: string), _col4 (type: int)
+                    key expressions: _col0 (type: string), _col2 (type: int)
                     null sort order: zz
                     sort order: ++
                     Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
index 98599b4a2f6..5a04302e5ee 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query47.q.out
@@ -1,7 +1,7 @@
 CBO PLAN:
 HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], sum_sales=[$4], psum=[$5], nsum=[$6])
   HiveSortLimit(sort0=[$7], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100])
-    HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($18, $19)])
+    HiveProject(i_category=[$12], d_year=[$16], d_moy=[$17], avg_monthly_sales=[$19], sum_sales=[$18], psum=[$10], nsum=[$4], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($18, $19)])
       HiveJoin(condition=[AND(=($12, $0), =($13, $1), =($14, $2), =($15, $3), =($20, $5))], joinType=[inner], algorithm=[none], cost=[not available])
         HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], -=[-($5, 1)])
           HiveFilter(condition=[IS NOT NULL($5)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
index fb6ce66e84d..5f639c2b148 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query57.q.out
@@ -1,7 +1,7 @@
 CBO PLAN:
 HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_sales=[$4], sum_sales=[$5], psum=[$6], nsum=[$7])
   HiveSortLimit(sort0=[$8], sort1=[$2], dir0=[ASC], dir1=[ASC], fetch=[100])
-    HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($15, $16)])
+    HiveProject(i_category=[$10], i_brand=[$11], d_year=[$13], d_moy=[$14], avg_monthly_sales=[$16], sum_sales=[$15], psum=[$8], nsum=[$3], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($15, $16)])
       HiveJoin(condition=[AND(=($10, $0), =($11, $1), =($12, $2), =($17, $4))], joinType=[inner], algorithm=[none], cost=[not available])
         HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], -=[-($4, 1)])
           HiveFilter(condition=[IS NOT NULL($4)])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
index 2cf9a6d816f..9aaa7c2a016 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query79.q.out
@@ -1,11 +1,11 @@
 CBO PLAN:
 HiveProject(c_last_name=[$0], c_first_name=[$1], _o__c2=[$2], ss_ticket_number=[$3], amt=[$4], profit=[$5])
   HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$6], sort3=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
-    HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[$8], ss_ticket_number=[$3], amt=[$6], profit=[$7], (tok_function substr (tok_table_or_col s_city) 1 30)=[substr($5, 1, 30)])
+    HiveProject(c_last_name=[$2], c_first_name=[$1], _o__c2=[$7], ss_ticket_number=[$3], amt=[$5], profit=[$6], (tok_function substr (tok_table_or_col s_city) 1 30)=[$7])
       HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
         HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9])
           HiveTableScan(table=[[default, customer]], table:alias=[customer])
-        HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], s_city=[$3], amt=[$4], profit=[$5], substr=[substr($3, 1, 30)])
+        HiveProject(ss_ticket_number=[$2], ss_customer_sk=[$0], amt=[$4], profit=[$5], substr=[substr($3, 1, 30)])
           HiveAggregate(group=[{0, 2, 4, 10}], agg#0=[sum($5)], agg#1=[sum($6)])
             HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available])
               HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
index e8ec73f72c3..89fe555e64f 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query89.q.out
@@ -1,7 +1,7 @@
 CBO PLAN:
 HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7])
   HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100])
-    HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)])
+    HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))1=[-($6, $7)])
       HiveFilter(condition=[CASE(<>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(1, 1)), false)])
         HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
           HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
index 332b0362261..3ae0064cc2d 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query79.q.out
@@ -181,16 +181,16 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 5713516612 Data size: 1919324915684 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: _col2 (type: bigint), _col0 (type: bigint), _col3 (type: varchar(60)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col3, 1, 30) (type: string)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 5713516612 Data size: 2413759812276 Basic stats: COMPLETE Column stats: COMPLETE
+                  expressions: _col2 (type: bigint), _col0 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), substr(_col3, 1, 30) (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 5713516612 Data size: 1882402767360 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col1 (type: bigint)
                     null sort order: z
                     sort order: +
                     Map-reduce partition columns: _col1 (type: bigint)
-                    Statistics: Num rows: 5713516612 Data size: 2413759812276 Basic stats: COMPLETE Column stats: COMPLETE
-                    value expressions: _col0 (type: bigint), _col2 (type: varchar(60)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: string)
+                    Statistics: Num rows: 5713516612 Data size: 1882402767360 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: string)
         Reducer 4 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -200,32 +200,32 @@ STAGE PLANS:
                 keys:
                   0 KEY.reducesinkkey0 (type: bigint)
                   1 KEY.reducesinkkey0 (type: bigint)
-                outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8
+                outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7
                 input vertices:
                   0 Map 1
-                Statistics: Num rows: 5713516612 Data size: 3410969417364 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
                 DynamicPartitionHashJoin: true
                 Top N Key Operator
                   sort order: ++++
-                  keys: _col2 (type: char(30)), _col1 (type: char(20)), substr(_col5, 1, 30) (type: string), _col7 (type: decimal(17,2))
+                  keys: _col2 (type: char(30)), _col1 (type: char(20)), _col7 (type: string), _col6 (type: decimal(17,2))
                   null sort order: zzzz
-                  Statistics: Num rows: 5713516612 Data size: 3410969417364 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
                   top n: 100
                   Select Operator
-                    expressions: _col2 (type: char(30)), _col1 (type: char(20)), _col8 (type: string), _col3 (type: bigint), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), substr(_col5, 1, 30) (type: string)
-                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                    Statistics: Num rows: 5713516612 Data size: 3405255900752 Basic stats: COMPLETE Column stats: COMPLETE
+                    expressions: _col2 (type: char(30)), _col1 (type: char(20)), _col3 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: string)
+                    outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+                    Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col6 (type: string), _col5 (type: decimal(17,2))
                       null sort order: zzzz
                       sort order: ++++
-                      Statistics: Num rows: 5713516612 Data size: 3405255900752 Basic stats: COMPLETE Column stats: COMPLETE
-                      value expressions: _col2 (type: string), _col3 (type: bigint), _col4 (type: decimal(17,2))
+                      Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col3 (type: bigint), _col4 (type: decimal(17,2))
         Reducer 5 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
-                expressions: KEY.reducesinkkey0 (type: char(30)), KEY.reducesinkkey1 (type: char(20)), VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2))
+                expressions: KEY.reducesinkkey0 (type: char(30)), KEY.reducesinkkey1 (type: char(20)), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2))
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                 Statistics: Num rows: 5713516612 Data size: 2879612372448 Basic stats: COMPLETE Column stats: COMPLETE
                 Limit