You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2016/09/14 18:52:03 UTC
[2/2] hive git commit: HIVE-14251: Union All of different types
resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal)
HIVE-14251: Union All of different types resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76fe9e78
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76fe9e78
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76fe9e78
Branch: refs/heads/master
Commit: 76fe9e783643a59c3e92c1c47d4a39ad744771cc
Parents: 0b62e6f
Author: Aihua Xu <ai...@apache.org>
Authored: Fri Jul 22 13:26:18 2016 -0400
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Sep 14 14:50:33 2016 -0400
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 33 ++---
.../hive/ql/exec/TestFunctionRegistry.java | 15 +-
.../clientpositive/alter_partition_change_col.q | 2 +-
.../clientpositive/alter_table_cascade.q | 4 +-
.../queries/clientpositive/groupby_sort_1_23.q | 4 +-
.../clientpositive/groupby_sort_skew_1_23.q | 8 +-
ql/src/test/queries/clientpositive/union32.q | 8 +-
ql/src/test/queries/clientpositive/union33.q | 10 +-
ql/src/test/queries/clientpositive/union36.q | 4 +-
.../queries/clientpositive/unionDistinct_1.q | 16 +--
.../queries/clientpositive/union_date_trim.q | 2 +-
ql/src/test/queries/clientpositive/union_null.q | 8 +-
.../queries/clientpositive/union_remove_12.q | 4 +-
.../queries/clientpositive/union_remove_13.q | 4 +-
.../queries/clientpositive/union_remove_14.q | 4 +-
.../queries/clientpositive/union_type_chk.q | 7 -
.../clientpositive/unionall_join_nullconstant.q | 2 +-
.../alter_partition_change_col.q.out | 4 +-
.../clientpositive/alter_table_cascade.q.out | 8 +-
.../clientpositive/groupby_sort_1_23.q.out | 122 ++++++++--------
.../clientpositive/groupby_sort_skew_1_23.q.out | 132 ++++++++---------
.../clientpositive/llap/unionDistinct_1.q.out | 70 +++++----
.../clientpositive/llap/union_type_chk.q.out | 30 ----
.../spark/groupby_sort_1_23.q.out | 134 ++++++++---------
.../spark/groupby_sort_skew_1_23.q.out | 144 +++++++++----------
.../results/clientpositive/spark/union32.q.out | 130 ++++++++---------
.../results/clientpositive/spark/union33.q.out | 16 +--
.../clientpositive/spark/union_date_trim.q.out | 4 +-
.../clientpositive/spark/union_null.q.out | 12 +-
.../clientpositive/spark/union_remove_12.q.out | 48 +++----
.../clientpositive/spark/union_remove_13.q.out | 52 +++----
.../clientpositive/spark/union_remove_14.q.out | 48 +++----
.../test/results/clientpositive/union32.q.out | 130 ++++++++---------
.../test/results/clientpositive/union33.q.out | 16 +--
.../test/results/clientpositive/union36.q.out | 28 ++--
.../clientpositive/unionDistinct_1.q.out | 70 +++++----
.../clientpositive/union_date_trim.q.out | 4 +-
.../results/clientpositive/union_null.q.out | 12 +-
.../clientpositive/union_remove_12.q.out | 8 +-
.../clientpositive/union_remove_13.q.out | 26 ++--
.../clientpositive/union_remove_14.q.out | 8 +-
.../results/clientpositive/union_type_chk.q.out | 30 ----
.../unionall_join_nullconstant.q.out | 4 +-
.../hive/serde2/typeinfo/TypeInfoUtils.java | 3 +
44 files changed, 660 insertions(+), 768 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 4710b8f..a854f9f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -697,7 +697,9 @@ public final class FunctionRegistry {
}
/**
- * Find a common class for union-all operator
+ * Find a common type for union-all operator. Only the common types for the same
+ * type group will resolve to a common type. No implicit conversion across different
+ * type groups will be done.
*/
public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) {
if (a.equals(b)) {
@@ -716,26 +718,21 @@ public final class FunctionRegistry {
PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
- // handle string types properly
- if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) {
- return getTypeInfoForPrimitiveCategory(
- (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING);
+ if (pgA != pgB) {
+ return null;
}
- if (TypeInfoUtils.implicitConvertible(a, b)) {
- return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB);
- }
- if (TypeInfoUtils.implicitConvertible(b, a)) {
- return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA);
- }
- for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) {
- if (TypeInfoUtils.implicitConvertible(pcA, t)
- && TypeInfoUtils.implicitConvertible(pcB, t)) {
- return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t);
- }
+ switch(pgA) {
+ case STRING_GROUP:
+ return getTypeInfoForPrimitiveCategory(
+ (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING);
+ case NUMERIC_GROUP:
+ return TypeInfoUtils.implicitConvertible(a, b) ? b : a;
+ case DATE_GROUP:
+ return TypeInfoFactory.timestampTypeInfo;
+ default:
+ return null;
}
-
- return null;
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
index 59ecd1e..d2d5a1b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
@@ -327,14 +327,12 @@ public class TestFunctionRegistry extends TestCase {
}
public void testCommonClassUnionAll() {
+ unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.intTypeInfo,
+ TypeInfoFactory.doubleTypeInfo);
unionAll(TypeInfoFactory.intTypeInfo, TypeInfoFactory.decimalTypeInfo,
TypeInfoFactory.decimalTypeInfo);
- unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo,
- TypeInfoFactory.stringTypeInfo);
unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo,
TypeInfoFactory.doubleTypeInfo);
- unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo,
- TypeInfoFactory.stringTypeInfo);
unionAll(varchar5, varchar10, varchar10);
unionAll(varchar10, varchar5, varchar10);
@@ -346,8 +344,13 @@ public class TestFunctionRegistry extends TestCase {
unionAll(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
unionAll(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo);
- // common class for char/varchar is string?
- comparison(char10, varchar5, TypeInfoFactory.stringTypeInfo);
+ unionAll(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.dateTypeInfo,
+ TypeInfoFactory.timestampTypeInfo);
+
+ // Invalid cases
+ unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, null);
+ unionAll(TypeInfoFactory.doubleTypeInfo, varchar10, null);
+
}
public void testGetTypeInfoForPrimitiveCategory() {
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/alter_partition_change_col.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_partition_change_col.q b/ql/src/test/queries/clientpositive/alter_partition_change_col.q
index 360f4d2..23de3d7 100644
--- a/ql/src/test/queries/clientpositive/alter_partition_change_col.q
+++ b/ql/src/test/queries/clientpositive/alter_partition_change_col.q
@@ -12,7 +12,7 @@ create table alter_partition_change_col1 (c1 string, c2 string) partitioned by (
insert overwrite table alter_partition_change_col1 partition (p1, p2)
select c1, c2, 'abc', '123' from alter_partition_change_col0
union all
- select c1, c2, null, '123' from alter_partition_change_col0;
+ select c1, c2, cast(null as string), '123' from alter_partition_change_col0;
show partitions alter_partition_change_col1;
select * from alter_partition_change_col1 where p1='abc';
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/alter_table_cascade.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_table_cascade.q b/ql/src/test/queries/clientpositive/alter_table_cascade.q
index acca4e8..288fe4a 100644
--- a/ql/src/test/queries/clientpositive/alter_table_cascade.q
+++ b/ql/src/test/queries/clientpositive/alter_table_cascade.q
@@ -15,7 +15,7 @@ create table alter_table_cascade (c1 string) partitioned by (p1 string, p2 strin
insert overwrite table alter_table_cascade partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src;
+ select c1, cast(null as string), '123' from alter_table_src;
show partitions alter_table_cascade;
describe alter_table_cascade;
@@ -92,7 +92,7 @@ create table alter_table_restrict (c1 string) partitioned by (p1 string, p2 stri
insert overwrite table alter_table_restrict partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src;
+ select c1, cast(null as string), '123' from alter_table_src;
show partitions alter_table_restrict;
describe alter_table_restrict;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/groupby_sort_1_23.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q
index 67fdd23..f0a00fb 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q
@@ -134,14 +134,14 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1;
INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1;
SELECT * FROM outputTbl1;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q
index 39b9420..38384dc 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q
@@ -100,12 +100,12 @@ SELECT * FROM outputTbl3;
-- group by followed by another group by
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key;
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key;
@@ -135,14 +135,14 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1;
INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1;
SELECT * FROM outputTbl1;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union32.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union32.q b/ql/src/test/queries/clientpositive/union32.q
index b18d484..4d9de6a 100644
--- a/ql/src/test/queries/clientpositive/union32.q
+++ b/ql/src/test/queries/clientpositive/union32.q
@@ -51,13 +51,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a
-- Test union with join on the left selecting multiple columns
EXPLAIN
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION ALL
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION ALL
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;
@@ -67,11 +67,11 @@ EXPLAIN
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION ALL
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION ALL
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union33.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union33.q b/ql/src/test/queries/clientpositive/union33.q
index 017442e..994060a 100644
--- a/ql/src/test/queries/clientpositive/union33.q
+++ b/ql/src/test/queries/clientpositive/union33.q
@@ -11,7 +11,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION ALL
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a;
@@ -20,7 +20,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION ALL
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a;
@@ -28,7 +28,7 @@ SELECT COUNT(*) FROM test_src;
EXPLAIN INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION ALL
SELECT key, value FROM src
@@ -37,7 +37,7 @@ UNION ALL
INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION ALL
SELECT key, value FROM src
@@ -45,4 +45,4 @@ UNION ALL
)a;
SELECT COUNT(*) FROM test_src;
-
\ No newline at end of file
+
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union36.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union36.q b/ql/src/test/queries/clientpositive/union36.q
index b79ff0f..f050e1a 100644
--- a/ql/src/test/queries/clientpositive/union36.q
+++ b/ql/src/test/queries/clientpositive/union36.q
@@ -2,9 +2,9 @@ set hive.mapred.mode=nonstrict;
set hive.cbo.enable=false;
-- SORT_QUERY_RESULTS
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u;
+select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast('100000000' as decimal(10,0)) x from (select * from src limit 2) s3)u;
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u;
+select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as decimal(10,0)) x from (select * from src limit 2) s3)u;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/unionDistinct_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q
index 5c52d9b..9792267 100644
--- a/ql/src/test/queries/clientpositive/unionDistinct_1.q
+++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q
@@ -911,13 +911,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a
-- Test union with join on the left selecting multiple columns
EXPLAIN
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;
@@ -927,13 +927,13 @@ EXPLAIN
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;
-- union33.q
@@ -950,7 +950,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a;
@@ -959,7 +959,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a;
@@ -967,7 +967,7 @@ SELECT COUNT(*) FROM test_src;
EXPLAIN INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
@@ -976,7 +976,7 @@ UNION DISTINCT
INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_date_trim.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_date_trim.q b/ql/src/test/queries/clientpositive/union_date_trim.q
index 6842e56..51f5997 100644
--- a/ql/src/test/queries/clientpositive/union_date_trim.q
+++ b/ql/src/test/queries/clientpositive/union_date_trim.q
@@ -4,4 +4,4 @@ insert into table testDate select 1, '2014-04-07' from src where key=100 limit 1
insert into table testDate select 2, '2014-04-08' from src where key=100 limit 1;
insert into table testDate select 3, '2014-04-09' from src where key=100 limit 1;
--- without the fix following query will throw HiveException: Incompatible types for union operator
-insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, trim(Cast (dt as string)) as tm from testDate where id = 3 ) a;
+insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, cast(trim(Cast (dt as string)) as date) as tm from testDate where id = 3 ) a;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_null.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_null.q b/ql/src/test/queries/clientpositive/union_null.q
index 23da07a..45448b4 100644
--- a/ql/src/test/queries/clientpositive/union_null.q
+++ b/ql/src/test/queries/clientpositive/union_null.q
@@ -1,10 +1,10 @@
-- SORT_BEFORE_DIFF
-- HIVE-2901
-select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a;
-set hive.cbo.returnpath.hiveop=true;
-select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a;
+select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a;
+set hive.cbo.returnpath.hiveop=true;
+select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a;
set hive.cbo.returnpath.hiveop=false;
-- HIVE-4837
-select * from (select * from (select null as N from src1 group by key)a UNION ALL select * from (select null as N from src1 group by key)b ) a;
+select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_12.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q
index b665666..6bfb991 100644
--- a/ql/src/test/queries/clientpositive/union_remove_12.q
+++ b/ql/src/test/queries/clientpositive/union_remove_12.q
@@ -34,7 +34,7 @@ SELECT * FROM
(
select key, 1 as `values` from inputTbl1
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
@@ -43,7 +43,7 @@ SELECT * FROM
(
select key, 1 as `values` from inputTbl1
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q
index 11077fd..4d59b6b 100644
--- a/ql/src/test/queries/clientpositive/union_remove_13.q
+++ b/ql/src/test/queries/clientpositive/union_remove_13.q
@@ -34,7 +34,7 @@ SELECT * FROM
(
select key, count(1) as `values` from inputTbl1 group by key
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
@@ -43,7 +43,7 @@ SELECT * FROM
(
select key, count(1) as `values` from inputTbl1 group by key
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q
index b559b35..3ffb985 100644
--- a/ql/src/test/queries/clientpositive/union_remove_14.q
+++ b/ql/src/test/queries/clientpositive/union_remove_14.q
@@ -35,7 +35,7 @@ SELECT * FROM
(
select key, 1 as `values` from inputTbl1
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
@@ -44,7 +44,7 @@ SELECT * FROM
(
select key, 1 as `values` from inputTbl1
union all
-select a.key as key, b.val as `values`
+select a.key as key, cast(b.val as bigint) as `values`
FROM inputTbl1 a join inputTbl1 b on a.key=b.key
)c;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_type_chk.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q
deleted file mode 100644
index ff2e7cf..0000000
--- a/ql/src/test/queries/clientpositive/union_type_chk.q
+++ /dev/null
@@ -1,7 +0,0 @@
-set hive.mapred.mode=nonstrict;
-set hive.cbo.enable=false;
-
--- SORT_QUERY_RESULTS
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u;
-
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u;
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q b/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q
index 4f0ffa6..6d6fa66 100644
--- a/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q
+++ b/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q
@@ -21,7 +21,7 @@ CREATE TABLE table_b2
CREATE VIEW a_view AS
SELECT
substring(a1.composite_key, 1, locate('|',a1.composite_key) - 1) AS autoname,
-NULL AS col1
+cast(NULL as string) AS col1
FROM table_a1 a1
FULL OUTER JOIN table_a2 a2
ON a1.composite_key = a2.composite_key
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_change_col.q.out b/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
index 23febee..fff987c 100644
--- a/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
@@ -29,14 +29,14 @@ POSTHOOK: Output: default@alter_partition_change_col1
PREHOOK: query: insert overwrite table alter_partition_change_col1 partition (p1, p2)
select c1, c2, 'abc', '123' from alter_partition_change_col0
union all
- select c1, c2, null, '123' from alter_partition_change_col0
+ select c1, c2, cast(null as string), '123' from alter_partition_change_col0
PREHOOK: type: QUERY
PREHOOK: Input: default@alter_partition_change_col0
PREHOOK: Output: default@alter_partition_change_col1
POSTHOOK: query: insert overwrite table alter_partition_change_col1 partition (p1, p2)
select c1, c2, 'abc', '123' from alter_partition_change_col0
union all
- select c1, c2, null, '123' from alter_partition_change_col0
+ select c1, c2, cast(null as string), '123' from alter_partition_change_col0
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alter_partition_change_col0
POSTHOOK: Output: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__/p2=123
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/alter_table_cascade.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_table_cascade.q.out b/ql/src/test/results/clientpositive/alter_table_cascade.q.out
index 1d8204c..5b9c9ee 100644
--- a/ql/src/test/results/clientpositive/alter_table_cascade.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_cascade.q.out
@@ -37,14 +37,14 @@ POSTHOOK: Output: default@alter_table_cascade
PREHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src
+ select c1, cast(null as string), '123' from alter_table_src
PREHOOK: type: QUERY
PREHOOK: Input: default@alter_table_src
PREHOOK: Output: default@alter_table_cascade
POSTHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src
+ select c1, cast(null as string), '123' from alter_table_src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alter_table_src
POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123
@@ -902,14 +902,14 @@ POSTHOOK: Output: default@alter_table_restrict
PREHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src
+ select c1, cast(null as string), '123' from alter_table_src
PREHOOK: type: QUERY
PREHOOK: Input: default@alter_table_src
PREHOOK: Output: default@alter_table_restrict
POSTHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2)
select c1, 'abc', '123' from alter_table_src
union all
- select c1, null, '123' from alter_table_src
+ select c1, cast(null as string), '123' from alter_table_src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alter_table_src
POSTHOOK: Output: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
index 81fe0d9..e70f912 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
@@ -2791,7 +2791,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by
@@ -2800,7 +2800,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -2902,23 +2902,27 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Select Operator
+ expressions: UDFToString(_col0) (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1
- columns.types double,bigint
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Map Reduce
@@ -2937,47 +2941,43 @@ STAGE PLANS:
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Union
+ Union
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 1
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
TableScan
GatherStats: false
Union
@@ -3027,7 +3027,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
- columns.types double,bigint
+ columns.types string,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -3036,7 +3036,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
- columns.types double,bigint
+ columns.types string,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -3310,7 +3310,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
@@ -3319,7 +3319,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
index 5cf0ea2..fc52984 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
@@ -2330,7 +2330,7 @@ PREHOOK: query: -- it should not matter what follows the group by
-- group by followed by another group by
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
PREHOOK: type: QUERY
@@ -2340,7 +2340,7 @@ POSTHOOK: query: -- it should not matter what follows the group by
-- group by followed by another group by
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
POSTHOOK: type: QUERY
@@ -2514,7 +2514,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ expressions: UDFToInteger(UDFToString(_col0)) (type: int), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -2581,14 +2581,14 @@ STAGE PLANS:
#### A masked pattern was here ####
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Output: default@outputtbl1
POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
POSTHOOK: type: QUERY
@@ -3055,7 +3055,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by
@@ -3064,7 +3064,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -3232,23 +3232,27 @@ STAGE PLANS:
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 0
+ Select Operator
+ expressions: UDFToString(_col0) (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1
- columns.types double,bigint
- escape.delim \
- serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string,bigint
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-2
Map Reduce
@@ -3267,47 +3271,43 @@ STAGE PLANS:
mode: final
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Union
+ Union
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 1
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 1
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
TableScan
GatherStats: false
Union
@@ -3357,7 +3357,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
- columns.types double,bigint
+ columns.types string,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -3366,7 +3366,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
properties:
columns _col0,_col1
- columns.types double,bigint
+ columns.types string,bigint
escape.delim \
serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
@@ -3640,7 +3640,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
@@ -3649,7 +3649,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index 52af8fd..624d886 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -13532,14 +13532,14 @@ POSTHOOK: Input: default@t2
PREHOOK: query: -- Test union with join on the left selecting multiple columns
EXPLAIN
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
PREHOOK: type: QUERY
POSTHOOK: query: -- Test union with join on the left selecting multiple columns
EXPLAIN
SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
POSTHOOK: type: QUERY
@@ -13580,7 +13580,7 @@ STAGE PLANS:
1 Map 4
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col1)) (type: string)
+ expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS varchar(20))) (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
@@ -13660,7 +13660,7 @@ STAGE PLANS:
ListSink
PREHOOK: query: SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
PREHOOK: type: QUERY
@@ -13668,7 +13668,7 @@ PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM
-(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
+(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
POSTHOOK: type: QUERY
@@ -13676,30 +13676,24 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
0.0 0
-0.0 0.0
2.0 2
-2.0 2.0
4.0 4
-4.0 4.0
5.0 5
-5.0 5.0
8.0 8
-8.0 8.0
9.0 9
-9.0 9.0
PREHOOK: query: -- Test union with join on the right selecting multiple columns
EXPLAIN
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
PREHOOK: type: QUERY
POSTHOOK: query: -- Test union with join on the right selecting multiple columns
EXPLAIN
SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -13721,18 +13715,18 @@ STAGE PLANS:
alias: t2
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double)
+ expressions: UDFToDouble(key) (type: double), key (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: double), _col1 (type: double)
+ keys: _col0 (type: double), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: double)
+ key expressions: _col0 (type: double), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
@@ -13759,18 +13753,18 @@ STAGE PLANS:
1 Map 5
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToDouble(_col1) (type: double)
+ expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- keys: _col0 (type: double), _col1 (type: double)
+ keys: _col0 (type: double), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: double)
+ key expressions: _col0 (type: double), _col1 (type: string)
sort order: ++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: double)
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
@@ -13797,7 +13791,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: double), KEY._col1 (type: double)
+ keys: KEY._col0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
@@ -13820,7 +13814,7 @@ STAGE PLANS:
PREHOOK: query: SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
@@ -13828,17 +13822,17 @@ PREHOOK: Input: default@t2
POSTHOOK: query: SELECT * FROM
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
-SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
+SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
-0.0 0.0
-2.0 2.0
-4.0 4.0
-5.0 5.0
-8.0 8.0
-9.0 9.0
+0.0 0
+2.0 2
+4.0 4
+5.0 5
+8.0 8
+9.0 9
PREHOOK: query: -- union33.q
-- SORT_BEFORE_DIFF
@@ -13868,7 +13862,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a
PREHOOK: type: QUERY
@@ -13877,7 +13871,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a
POSTHOOK: type: QUERY
@@ -14007,7 +14001,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a
PREHOOK: type: QUERY
@@ -14018,7 +14012,7 @@ SELECT key, value FROM (
SELECT key, value FROM src
WHERE key = 0
UNION DISTINCT
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
)a
POSTHOOK: type: QUERY
@@ -14037,7 +14031,7 @@ POSTHOOK: Input: default@test_src
310
PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
@@ -14046,7 +14040,7 @@ UNION DISTINCT
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
@@ -14176,7 +14170,7 @@ STAGE PLANS:
PREHOOK: query: INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
@@ -14187,7 +14181,7 @@ PREHOOK: Input: default@src
PREHOOK: Output: default@test_src
POSTHOOK: query: INSERT OVERWRITE TABLE test_src
SELECT key, value FROM (
- SELECT key, COUNT(*) AS value FROM src
+ SELECT key, cast(COUNT(*) as string) AS value FROM src
GROUP BY key
UNION DISTINCT
SELECT key, value FROM src
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/llap/union_type_chk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_type_chk.q.out b/ql/src/test/results/clientpositive/llap/union_type_chk.q.out
deleted file mode 100644
index 1eb0182..0000000
--- a/ql/src/test/results/clientpositive/llap/union_type_chk.q.out
+++ /dev/null
@@ -1,30 +0,0 @@
-PREHOOK: query: -- SORT_QUERY_RESULTS
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: -- SORT_QUERY_RESULTS
-select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-0.4999999900000002
-0.4999999900000002
-4.999999900000002E-9
-4.999999900000002E-9
-4.999999900000002E-9
-4.999999900000002E-9
-PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-#### A masked pattern was here ####
-POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-#### A masked pattern was here ####
-0.25
-0.25
-0.25
-0.25
-NULL
-NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
index 408c1b9..c6a7982 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
@@ -1941,7 +1941,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by
@@ -1950,7 +1950,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -1982,44 +1982,40 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint)
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 4
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
+ name default.outputtbl1
+ numFiles 4
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -2161,40 +2157,44 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ expressions: UDFToString(_col0) (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 4
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 4
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -2231,7 +2231,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
@@ -2240,7 +2240,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
index 6325889..a438124 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
@@ -1467,7 +1467,7 @@ PREHOOK: query: -- it should not matter what follows the group by
-- group by followed by another group by
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
PREHOOK: type: QUERY
@@ -1477,7 +1477,7 @@ POSTHOOK: query: -- it should not matter what follows the group by
-- group by followed by another group by
EXPLAIN EXTENDED
INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
POSTHOOK: type: QUERY
@@ -1610,7 +1610,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ expressions: UDFToInteger(UDFToString(_col0)) (type: int), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1677,14 +1677,14 @@ STAGE PLANS:
#### A masked pattern was here ####
PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Output: default@outputtbl1
POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
-SELECT key + key, sum(cnt) from
+SELECT cast(key + key as string), sum(cnt) from
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1
group by key + key
POSTHOOK: type: QUERY
@@ -2017,7 +2017,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by
@@ -2026,7 +2026,7 @@ INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -2059,44 +2059,40 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint)
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
- outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 4
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
+ name default.outputtbl1
+ numFiles 4
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -2256,40 +2252,44 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ expressions: UDFToString(_col0) (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
- bucket_count -1
- columns key,cnt
- columns.comments
- columns.types int:int
-#### A masked pattern was here ####
- name default.outputtbl1
- numFiles 4
- numRows 10
- rawDataSize 30
- serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 40
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.outputtbl1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns key,cnt
+ columns.comments
+ columns.types int:int
+#### A masked pattern was here ####
+ name default.outputtbl1
+ numFiles 4
+ numRows 10
+ rawDataSize 30
+ serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 40
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -2326,7 +2326,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
@@ -2335,7 +2335,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
SELECT * FROM (
SELECT key, count(1) as cnt FROM T1 GROUP BY key
UNION ALL
-SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key
+SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key
) subq1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1