You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by se...@apache.org on 2015/10/02 04:37:36 UTC

[01/22] hive git commit: HIVE-11910: TestHCatLoaderEncryption should shutdown created MiniDFS instance (Jason Dere, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/llap f272ccb25 -> c5ccf6694


HIVE-11910: TestHCatLoaderEncryption should shutdown created MiniDFS instance (Jason Dere, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b36cb379
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b36cb379
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b36cb379

Branch: refs/heads/llap
Commit: b36cb37963eb7f69621543f89eaa21ef1458e031
Parents: 064e37c
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed Sep 30 12:12:47 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed Sep 30 12:12:47 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java    | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b36cb379/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
index 3b8076b..df3b72a 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
@@ -426,6 +426,9 @@ public class TestHCatLoaderEncryption {
       }
     } finally {
       FileUtils.deleteDirectory(new File(TEST_DATA_DIR));
+      if (dfs != null) {
+        dfs.shutdown();
+      }
     }
   }

[02/22] hive git commit: HIVE-11962 : Improve windowing_windowspec2.q tests to return consistent results (Aihua Xu via Szehon)

Posted by se...@apache.org.

HIVE-11962 : Improve windowing_windowspec2.q tests to return consistent results (Aihua Xu via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/274847e2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/274847e2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/274847e2

Branch: refs/heads/llap
Commit: 274847e2a38164637701ac4c62802ac91cca4432
Parents: b36cb37
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 30 12:17:06 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 30 12:18:27 2015 -0700

----------------------------------------------------------------------
 .../clientpositive/windowing_windowspec2.q      |  16 +-
 .../clientpositive/windowing_windowspec2.q.out  | 198 +++++++++----------
 2 files changed, 107 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/274847e2/ql/src/test/queries/clientpositive/windowing_windowspec2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec2.q b/ql/src/test/queries/clientpositive/windowing_windowspec2.q
index 3c5bc3d..0ec1e99 100644
--- a/ql/src/test/queries/clientpositive/windowing_windowspec2.q
+++ b/ql/src/test/queries/clientpositive/windowing_windowspec2.q
@@ -36,16 +36,16 @@ select ts, f, count(f) over (partition by ts order by f rows between 1 following
 select ts, f, count(f) over (partition by ts order by f rows between unbounded preceding and 1 following) from over10k limit 100;
 
 -- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100;
 
 -- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100;
 
 -- first_value
 select ts, f, first_value(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100;

http://git-wip-us.apache.org/repos/asf/hive/blob/274847e2/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_windowspec2.q.out b/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
index e71a03f..a88eb6c 100644
--- a/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
+++ b/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
@@ -1347,12 +1347,12 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	79.46	24
 2013-03-01 09:11:58.703072	80.02	25
 PREHOOK: query: -- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
 POSTHOOK: query: -- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1363,8 +1363,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	39.48
 2013-03-01 09:11:58.70307	56.94	31.17
 2013-03-01 09:11:58.70307	78.58	56.94
-2013-03-01 09:11:58.70307	38.61	78.58
 2013-03-01 09:11:58.70307	14.78	78.58
+2013-03-01 09:11:58.70307	38.61	78.58
 2013-03-01 09:11:58.70307	91.36	38.61
 2013-03-01 09:11:58.70307	28.69	91.36
 2013-03-01 09:11:58.70307	73.52	91.36
@@ -1397,10 +1397,10 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	68.62
 2013-03-01 09:11:58.703071	54.09	80.43
 2013-03-01 09:11:58.703071	42.08	80.43
-2013-03-01 09:11:58.703071	64.55	54.09
-2013-03-01 09:11:58.703071	48.89	64.55
+2013-03-01 09:11:58.703071	48.89	54.09
+2013-03-01 09:11:58.703071	64.55	48.89
 2013-03-01 09:11:58.703071	56.45	64.55
-2013-03-01 09:11:58.703071	1.99	56.45
+2013-03-01 09:11:58.703071	1.99	64.55
 2013-03-01 09:11:58.703071	94.27	56.45
 2013-03-01 09:11:58.703071	35.32	94.27
 2013-03-01 09:11:58.703071	10.62	94.27
@@ -1419,23 +1419,23 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	83.98
 2013-03-01 09:11:58.703071	50.28	83.98
 2013-03-01 09:11:58.703071	3.73	50.28
-2013-03-01 09:11:58.703071	53.26	50.28
-2013-03-01 09:11:58.703071	29.71	53.26
+2013-03-01 09:11:58.703071	29.71	50.28
+2013-03-01 09:11:58.703071	53.26	29.71
 2013-03-01 09:11:58.703071	8.86	53.26
-2013-03-01 09:11:58.703071	21.01	29.71
+2013-03-01 09:11:58.703071	21.01	53.26
 2013-03-01 09:11:58.703071	84.21	21.01
 2013-03-01 09:11:58.703071	19.1	84.21
 2013-03-01 09:11:58.703071	31.94	84.21
 2013-03-01 09:11:58.703071	88.93	31.94
 2013-03-01 09:11:58.703071	12.83	88.93
 2013-03-01 09:11:58.703071	29.07	88.93
-2013-03-01 09:11:58.703071	61.88	29.07
-2013-03-01 09:11:58.703071	61.41	61.88
+2013-03-01 09:11:58.703071	61.41	29.07
+2013-03-01 09:11:58.703071	61.88	61.41
 2013-03-01 09:11:58.703071	46.84	61.88
-2013-03-01 09:11:58.703072	95.01	NULL
-2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	62.09	NULL
+2013-03-01 09:11:58.703072	95.01	62.09
 2013-03-01 09:11:58.703072	29.01	95.01
-2013-03-01 09:11:58.703072	79.46	62.09
+2013-03-01 09:11:58.703072	79.46	95.01
 2013-03-01 09:11:58.703072	4.48	79.46
 2013-03-01 09:11:58.703072	99.26	79.46
 2013-03-01 09:11:58.703072	58.77	99.26
@@ -1453,14 +1453,14 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	56.7
 2013-03-01 09:11:58.703072	88.08	39.3
 2013-03-01 09:11:58.703072	0.48	88.08
-2013-03-01 09:11:58.703072	88.83	88.08
-2013-03-01 09:11:58.703072	9.0	88.83
-2013-03-01 09:11:58.703072	54.1	88.83
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+2013-03-01 09:11:58.703072	9.0	88.08
+2013-03-01 09:11:58.703072	88.83	9.0
+2013-03-01 09:11:58.703072	45.91	88.83
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1471,8 +1471,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	39.48
 2013-03-01 09:11:58.70307	56.94	39.48
 2013-03-01 09:11:58.70307	78.58	56.94
-2013-03-01 09:11:58.70307	38.61	78.58
 2013-03-01 09:11:58.70307	14.78	78.58
+2013-03-01 09:11:58.70307	38.61	78.58
 2013-03-01 09:11:58.70307	91.36	78.58
 2013-03-01 09:11:58.70307	28.69	91.36
 2013-03-01 09:11:58.70307	73.52	91.36
@@ -1505,8 +1505,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	99.45
 2013-03-01 09:11:58.703071	54.09	99.45
 2013-03-01 09:11:58.703071	42.08	99.45
-2013-03-01 09:11:58.703071	64.55	99.45
 2013-03-01 09:11:58.703071	48.89	99.45
+2013-03-01 09:11:58.703071	64.55	99.45
 2013-03-01 09:11:58.703071	56.45	99.45
 2013-03-01 09:11:58.703071	1.99	99.45
 2013-03-01 09:11:58.703071	94.27	99.45
@@ -1527,8 +1527,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	99.45
 2013-03-01 09:11:58.703071	50.28	99.45
 2013-03-01 09:11:58.703071	3.73	99.45
-2013-03-01 09:11:58.703071	53.26	99.45
 2013-03-01 09:11:58.703071	29.71	99.45
+2013-03-01 09:11:58.703071	53.26	99.45
 2013-03-01 09:11:58.703071	8.86	99.45
 2013-03-01 09:11:58.703071	21.01	99.45
 2013-03-01 09:11:58.703071	84.21	99.45
@@ -1537,11 +1537,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	99.45
 2013-03-01 09:11:58.703071	12.83	99.45
 2013-03-01 09:11:58.703071	29.07	99.45
-2013-03-01 09:11:58.703071	61.88	99.45
 2013-03-01 09:11:58.703071	61.41	99.45
+2013-03-01 09:11:58.703071	61.88	99.45
 2013-03-01 09:11:58.703071	46.84	99.45
-2013-03-01 09:11:58.703072	95.01	NULL
-2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	62.09	NULL
+2013-03-01 09:11:58.703072	95.01	62.09
 2013-03-01 09:11:58.703072	29.01	95.01
 2013-03-01 09:11:58.703072	79.46	95.01
 2013-03-01 09:11:58.703072	4.48	95.01
@@ -1561,14 +1561,14 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	99.26
 2013-03-01 09:11:58.703072	88.08	99.26
 2013-03-01 09:11:58.703072	0.48	99.26
-2013-03-01 09:11:58.703072	88.83	99.26
 2013-03-01 09:11:58.703072	9.0	99.26
-2013-03-01 09:11:58.703072	54.1	99.26
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+2013-03-01 09:11:58.703072	88.83	99.26
+2013-03-01 09:11:58.703072	45.91	99.26
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1579,8 +1579,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	78.58
 2013-03-01 09:11:58.70307	56.94	78.58
 2013-03-01 09:11:58.70307	78.58	38.61
-2013-03-01 09:11:58.70307	38.61	91.36
 2013-03-01 09:11:58.70307	14.78	91.36
+2013-03-01 09:11:58.70307	38.61	91.36
 2013-03-01 09:11:58.70307	91.36	73.52
 2013-03-01 09:11:58.70307	28.69	92.96
 2013-03-01 09:11:58.70307	73.52	95.04
@@ -1611,10 +1611,10 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	37.32	80.43
 2013-03-01 09:11:58.703071	68.62	80.43
 2013-03-01 09:11:58.703071	80.43	54.09
-2013-03-01 09:11:58.703071	54.09	64.55
+2013-03-01 09:11:58.703071	54.09	48.89
 2013-03-01 09:11:58.703071	42.08	64.55
+2013-03-01 09:11:58.703071	48.89	64.55
 2013-03-01 09:11:58.703071	64.55	56.45
-2013-03-01 09:11:58.703071	48.89	56.45
 2013-03-01 09:11:58.703071	56.45	94.27
 2013-03-01 09:11:58.703071	1.99	94.27
 2013-03-01 09:11:58.703071	94.27	35.32
@@ -1633,23 +1633,23 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	27.23	83.98
 2013-03-01 09:11:58.703071	83.98	50.28
 2013-03-01 09:11:58.703071	31.84	50.28
-2013-03-01 09:11:58.703071	50.28	53.26
+2013-03-01 09:11:58.703071	50.28	29.71
 2013-03-01 09:11:58.703071	3.73	53.26
-2013-03-01 09:11:58.703071	53.26	29.71
-2013-03-01 09:11:58.703071	29.71	21.01
+2013-03-01 09:11:58.703071	29.71	53.26
+2013-03-01 09:11:58.703071	53.26	21.01
 2013-03-01 09:11:58.703071	8.86	84.21
 2013-03-01 09:11:58.703071	21.01	84.21
 2013-03-01 09:11:58.703071	84.21	31.94
 2013-03-01 09:11:58.703071	19.1	88.93
 2013-03-01 09:11:58.703071	31.94	88.93
 2013-03-01 09:11:58.703071	88.93	29.07
-2013-03-01 09:11:58.703071	12.83	61.88
+2013-03-01 09:11:58.703071	12.83	61.41
 2013-03-01 09:11:58.703071	29.07	61.88
-2013-03-01 09:11:58.703071	61.88	61.41
-2013-03-01 09:11:58.703071	61.41	46.84
+2013-03-01 09:11:58.703071	61.41	61.88
+2013-03-01 09:11:58.703071	61.88	46.84
 2013-03-01 09:11:58.703071	46.84	NULL
-2013-03-01 09:11:58.703072	95.01	62.09
-2013-03-01 09:11:58.703072	62.09	79.46
+2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	95.01	79.46
 2013-03-01 09:11:58.703072	29.01	79.46
 2013-03-01 09:11:58.703072	79.46	99.26
 2013-03-01 09:11:58.703072	4.48	99.26
@@ -1667,16 +1667,16 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	56.7	39.3
 2013-03-01 09:11:58.703072	39.3	88.08
 2013-03-01 09:11:58.703072	25.91	88.08
-2013-03-01 09:11:58.703072	88.08	88.83
+2013-03-01 09:11:58.703072	88.08	9.0
 2013-03-01 09:11:58.703072	0.48	88.83
+2013-03-01 09:11:58.703072	9.0	88.83
 2013-03-01 09:11:58.703072	88.83	54.1
-2013-03-01 09:11:58.703072	9.0	54.1
-2013-03-01 09:11:58.703072	54.1	45.91
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+2013-03-01 09:11:58.703072	45.91	54.1
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1687,8 +1687,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	56.94
 2013-03-01 09:11:58.70307	56.94	78.58
 2013-03-01 09:11:58.70307	78.58	78.58
-2013-03-01 09:11:58.70307	38.61	78.58
-2013-03-01 09:11:58.70307	14.78	91.36
+2013-03-01 09:11:58.70307	14.78	78.58
+2013-03-01 09:11:58.70307	38.61	91.36
 2013-03-01 09:11:58.70307	91.36	91.36
 2013-03-01 09:11:58.70307	28.69	91.36
 2013-03-01 09:11:58.70307	73.52	92.96
@@ -1721,8 +1721,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	99.45
 2013-03-01 09:11:58.703071	54.09	99.45
 2013-03-01 09:11:58.703071	42.08	99.45
-2013-03-01 09:11:58.703071	64.55	99.45
 2013-03-01 09:11:58.703071	48.89	99.45
+2013-03-01 09:11:58.703071	64.55	99.45
 2013-03-01 09:11:58.703071	56.45	99.45
 2013-03-01 09:11:58.703071	1.99	99.45
 2013-03-01 09:11:58.703071	94.27	99.45
@@ -1743,8 +1743,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	99.45
 2013-03-01 09:11:58.703071	50.28	99.45
 2013-03-01 09:11:58.703071	3.73	99.45
-2013-03-01 09:11:58.703071	53.26	99.45
 2013-03-01 09:11:58.703071	29.71	99.45
+2013-03-01 09:11:58.703071	53.26	99.45
 2013-03-01 09:11:58.703071	8.86	99.45
 2013-03-01 09:11:58.703071	21.01	99.45
 2013-03-01 09:11:58.703071	84.21	99.45
@@ -1753,11 +1753,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	99.45
 2013-03-01 09:11:58.703071	12.83	99.45
 2013-03-01 09:11:58.703071	29.07	99.45
-2013-03-01 09:11:58.703071	61.88	99.45
 2013-03-01 09:11:58.703071	61.41	99.45
+2013-03-01 09:11:58.703071	61.88	99.45
 2013-03-01 09:11:58.703071	46.84	99.45
-2013-03-01 09:11:58.703072	95.01	95.01
 2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	95.01	95.01
 2013-03-01 09:11:58.703072	29.01	95.01
 2013-03-01 09:11:58.703072	79.46	95.01
 2013-03-01 09:11:58.703072	4.48	99.26
@@ -1777,16 +1777,16 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	99.26
 2013-03-01 09:11:58.703072	88.08	99.26
 2013-03-01 09:11:58.703072	0.48	99.26
-2013-03-01 09:11:58.703072	88.83	99.26
 2013-03-01 09:11:58.703072	9.0	99.26
-2013-03-01 09:11:58.703072	54.1	99.26
+2013-03-01 09:11:58.703072	88.83	99.26
+2013-03-01 09:11:58.703072	45.91	99.26
 PREHOOK: query: -- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
 POSTHOOK: query: -- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1797,10 +1797,10 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	17.85
 2013-03-01 09:11:58.70307	56.94	17.85
 2013-03-01 09:11:58.70307	78.58	31.17
-2013-03-01 09:11:58.70307	38.61	56.94
-2013-03-01 09:11:58.70307	14.78	38.61
+2013-03-01 09:11:58.70307	14.78	56.94
+2013-03-01 09:11:58.70307	38.61	14.78
 2013-03-01 09:11:58.70307	91.36	14.78
-2013-03-01 09:11:58.70307	28.69	14.78
+2013-03-01 09:11:58.70307	28.69	38.61
 2013-03-01 09:11:58.70307	73.52	28.69
 2013-03-01 09:11:58.70307	92.96	28.69
 2013-03-01 09:11:58.70307	95.04	73.52
@@ -1831,10 +1831,10 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	37.32
 2013-03-01 09:11:58.703071	54.09	68.62
 2013-03-01 09:11:58.703071	42.08	54.09
-2013-03-01 09:11:58.703071	64.55	42.08
 2013-03-01 09:11:58.703071	48.89	42.08
+2013-03-01 09:11:58.703071	64.55	42.08
 2013-03-01 09:11:58.703071	56.45	48.89
-2013-03-01 09:11:58.703071	1.99	48.89
+2013-03-01 09:11:58.703071	1.99	56.45
 2013-03-01 09:11:58.703071	94.27	1.99
 2013-03-01 09:11:58.703071	35.32	1.99
 2013-03-01 09:11:58.703071	10.62	35.32
@@ -1853,8 +1853,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	27.23
 2013-03-01 09:11:58.703071	50.28	31.84
 2013-03-01 09:11:58.703071	3.73	31.84
-2013-03-01 09:11:58.703071	53.26	3.73
 2013-03-01 09:11:58.703071	29.71	3.73
+2013-03-01 09:11:58.703071	53.26	3.73
 2013-03-01 09:11:58.703071	8.86	29.71
 2013-03-01 09:11:58.703071	21.01	8.86
 2013-03-01 09:11:58.703071	84.21	8.86
@@ -1863,11 +1863,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	19.1
 2013-03-01 09:11:58.703071	12.83	31.94
 2013-03-01 09:11:58.703071	29.07	12.83
-2013-03-01 09:11:58.703071	61.88	12.83
-2013-03-01 09:11:58.703071	61.41	29.07
+2013-03-01 09:11:58.703071	61.41	12.83
+2013-03-01 09:11:58.703071	61.88	29.07
 2013-03-01 09:11:58.703071	46.84	61.41
-2013-03-01 09:11:58.703072	95.01	NULL
-2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	62.09	NULL
+2013-03-01 09:11:58.703072	95.01	62.09
 2013-03-01 09:11:58.703072	29.01	62.09
 2013-03-01 09:11:58.703072	79.46	29.01
 2013-03-01 09:11:58.703072	4.48	29.01
@@ -1887,14 +1887,14 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	39.3
 2013-03-01 09:11:58.703072	88.08	25.91
 2013-03-01 09:11:58.703072	0.48	25.91
-2013-03-01 09:11:58.703072	88.83	0.48
 2013-03-01 09:11:58.703072	9.0	0.48
-2013-03-01 09:11:58.703072	54.1	9.0
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+2013-03-01 09:11:58.703072	88.83	0.48
+2013-03-01 09:11:58.703072	45.91	9.0
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -1905,8 +1905,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	14.54
 2013-03-01 09:11:58.70307	56.94	14.54
 2013-03-01 09:11:58.70307	78.58	14.54
-2013-03-01 09:11:58.70307	38.61	14.54
 2013-03-01 09:11:58.70307	14.78	14.54
+2013-03-01 09:11:58.70307	38.61	14.54
 2013-03-01 09:11:58.70307	91.36	14.54
 2013-03-01 09:11:58.70307	28.69	14.54
 2013-03-01 09:11:58.70307	73.52	14.54
@@ -1939,8 +1939,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	0.83
 2013-03-01 09:11:58.703071	54.09	0.83
 2013-03-01 09:11:58.703071	42.08	0.83
-2013-03-01 09:11:58.703071	64.55	0.83
 2013-03-01 09:11:58.703071	48.89	0.83
+2013-03-01 09:11:58.703071	64.55	0.83
 2013-03-01 09:11:58.703071	56.45	0.83
 2013-03-01 09:11:58.703071	1.99	0.83
 2013-03-01 09:11:58.703071	94.27	0.83
@@ -1961,8 +1961,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	0.83
 2013-03-01 09:11:58.703071	50.28	0.83
 2013-03-01 09:11:58.703071	3.73	0.83
-2013-03-01 09:11:58.703071	53.26	0.83
 2013-03-01 09:11:58.703071	29.71	0.83
+2013-03-01 09:11:58.703071	53.26	0.83
 2013-03-01 09:11:58.703071	8.86	0.83
 2013-03-01 09:11:58.703071	21.01	0.83
 2013-03-01 09:11:58.703071	84.21	0.83
@@ -1971,11 +1971,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	0.83
 2013-03-01 09:11:58.703071	12.83	0.83
 2013-03-01 09:11:58.703071	29.07	0.83
-2013-03-01 09:11:58.703071	61.88	0.83
 2013-03-01 09:11:58.703071	61.41	0.83
+2013-03-01 09:11:58.703071	61.88	0.83
 2013-03-01 09:11:58.703071	46.84	0.83
-2013-03-01 09:11:58.703072	95.01	NULL
-2013-03-01 09:11:58.703072	62.09	95.01
+2013-03-01 09:11:58.703072	62.09	NULL
+2013-03-01 09:11:58.703072	95.01	62.09
 2013-03-01 09:11:58.703072	29.01	62.09
 2013-03-01 09:11:58.703072	79.46	29.01
 2013-03-01 09:11:58.703072	4.48	29.01
@@ -1995,14 +1995,14 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	0.79
 2013-03-01 09:11:58.703072	88.08	0.79
 2013-03-01 09:11:58.703072	0.48	0.79
-2013-03-01 09:11:58.703072	88.83	0.48
 2013-03-01 09:11:58.703072	9.0	0.48
-2013-03-01 09:11:58.703072	54.1	0.48
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+2013-03-01 09:11:58.703072	88.83	0.48
+2013-03-01 09:11:58.703072	45.91	0.48
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -2011,10 +2011,10 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	39.48	17.85
 2013-03-01 09:11:58.70307	17.85	31.17
 2013-03-01 09:11:58.70307	31.17	56.94
-2013-03-01 09:11:58.70307	56.94	38.61
+2013-03-01 09:11:58.70307	56.94	14.78
 2013-03-01 09:11:58.70307	78.58	14.78
-2013-03-01 09:11:58.70307	38.61	14.78
-2013-03-01 09:11:58.70307	14.78	28.69
+2013-03-01 09:11:58.70307	14.78	38.61
+2013-03-01 09:11:58.70307	38.61	28.69
 2013-03-01 09:11:58.70307	91.36	28.69
 2013-03-01 09:11:58.70307	28.69	73.52
 2013-03-01 09:11:58.70307	73.52	92.96
@@ -2047,8 +2047,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	42.08
 2013-03-01 09:11:58.703071	54.09	42.08
 2013-03-01 09:11:58.703071	42.08	48.89
-2013-03-01 09:11:58.703071	64.55	48.89
-2013-03-01 09:11:58.703071	48.89	1.99
+2013-03-01 09:11:58.703071	48.89	56.45
+2013-03-01 09:11:58.703071	64.55	1.99
 2013-03-01 09:11:58.703071	56.45	1.99
 2013-03-01 09:11:58.703071	1.99	35.32
 2013-03-01 09:11:58.703071	94.27	10.62
@@ -2069,8 +2069,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	3.73
 2013-03-01 09:11:58.703071	50.28	3.73
 2013-03-01 09:11:58.703071	3.73	29.71
-2013-03-01 09:11:58.703071	53.26	8.86
 2013-03-01 09:11:58.703071	29.71	8.86
+2013-03-01 09:11:58.703071	53.26	8.86
 2013-03-01 09:11:58.703071	8.86	21.01
 2013-03-01 09:11:58.703071	21.01	19.1
 2013-03-01 09:11:58.703071	84.21	19.1
@@ -2079,11 +2079,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	12.83
 2013-03-01 09:11:58.703071	12.83	29.07
 2013-03-01 09:11:58.703071	29.07	61.41
-2013-03-01 09:11:58.703071	61.88	46.84
 2013-03-01 09:11:58.703071	61.41	46.84
+2013-03-01 09:11:58.703071	61.88	46.84
 2013-03-01 09:11:58.703071	46.84	NULL
-2013-03-01 09:11:58.703072	95.01	29.01
 2013-03-01 09:11:58.703072	62.09	29.01
+2013-03-01 09:11:58.703072	95.01	29.01
 2013-03-01 09:11:58.703072	29.01	4.48
 2013-03-01 09:11:58.703072	79.46	4.48
 2013-03-01 09:11:58.703072	4.48	58.77
@@ -2103,14 +2103,14 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	0.48
 2013-03-01 09:11:58.703072	88.08	0.48
 2013-03-01 09:11:58.703072	0.48	9.0
-2013-03-01 09:11:58.703072	88.83	9.0
 2013-03-01 09:11:58.703072	9.0	45.91
-2013-03-01 09:11:58.703072	54.1	0.36
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+2013-03-01 09:11:58.703072	88.83	45.91
+2013-03-01 09:11:58.703072	45.91	0.36
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k
 #### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k
 #### A masked pattern was here ####
@@ -2121,8 +2121,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.70307	31.17	14.54
 2013-03-01 09:11:58.70307	56.94	14.54
 2013-03-01 09:11:58.70307	78.58	14.54
-2013-03-01 09:11:58.70307	38.61	14.54
 2013-03-01 09:11:58.70307	14.78	14.54
+2013-03-01 09:11:58.70307	38.61	14.54
 2013-03-01 09:11:58.70307	91.36	14.54
 2013-03-01 09:11:58.70307	28.69	14.54
 2013-03-01 09:11:58.70307	73.52	14.54
@@ -2155,8 +2155,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	80.43	0.83
 2013-03-01 09:11:58.703071	54.09	0.83
 2013-03-01 09:11:58.703071	42.08	0.83
-2013-03-01 09:11:58.703071	64.55	0.83
 2013-03-01 09:11:58.703071	48.89	0.83
+2013-03-01 09:11:58.703071	64.55	0.83
 2013-03-01 09:11:58.703071	56.45	0.83
 2013-03-01 09:11:58.703071	1.99	0.83
 2013-03-01 09:11:58.703071	94.27	0.83
@@ -2177,8 +2177,8 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	31.84	0.83
 2013-03-01 09:11:58.703071	50.28	0.83
 2013-03-01 09:11:58.703071	3.73	0.83
-2013-03-01 09:11:58.703071	53.26	0.83
 2013-03-01 09:11:58.703071	29.71	0.83
+2013-03-01 09:11:58.703071	53.26	0.83
 2013-03-01 09:11:58.703071	8.86	0.83
 2013-03-01 09:11:58.703071	21.01	0.83
 2013-03-01 09:11:58.703071	84.21	0.83
@@ -2187,11 +2187,11 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703071	88.93	0.83
 2013-03-01 09:11:58.703071	12.83	0.83
 2013-03-01 09:11:58.703071	29.07	0.83
-2013-03-01 09:11:58.703071	61.88	0.83
 2013-03-01 09:11:58.703071	61.41	0.83
+2013-03-01 09:11:58.703071	61.88	0.83
 2013-03-01 09:11:58.703071	46.84	0.83
-2013-03-01 09:11:58.703072	95.01	62.09
-2013-03-01 09:11:58.703072	62.09	29.01
+2013-03-01 09:11:58.703072	62.09	62.09
+2013-03-01 09:11:58.703072	95.01	29.01
 2013-03-01 09:11:58.703072	29.01	29.01
 2013-03-01 09:11:58.703072	79.46	4.48
 2013-03-01 09:11:58.703072	4.48	4.48
@@ -2211,9 +2211,9 @@ POSTHOOK: Input: default@over10k
 2013-03-01 09:11:58.703072	25.91	0.79
 2013-03-01 09:11:58.703072	88.08	0.48
 2013-03-01 09:11:58.703072	0.48	0.48
-2013-03-01 09:11:58.703072	88.83	0.48
 2013-03-01 09:11:58.703072	9.0	0.48
-2013-03-01 09:11:58.703072	54.1	0.48
+2013-03-01 09:11:58.703072	88.83	0.48
+2013-03-01 09:11:58.703072	45.91	0.48
 PREHOOK: query: -- first_value
 select ts, f, first_value(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100
 PREHOOK: type: QUERY

[03/22] hive git commit: HIVE-11903 : Add lock metrics to HS2 (Yongzhi Chen via Szehon)

Posted by se...@apache.org.

HIVE-11903 : Add lock metrics to HS2 (Yongzhi Chen via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/50b6d0c6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/50b6d0c6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/50b6d0c6

Branch: refs/heads/llap
Commit: 50b6d0c6bbf3ba838d3851ccc7cad0575c39732c
Parents: 274847e
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 30 12:21:30 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 30 12:21:30 2015 -0700

----------------------------------------------------------------------
 .../common/metrics/common/MetricsConstant.java  |  5 ++
 .../hadoop/hive/ql/lockmgr/DbLockManager.java   | 21 ++++++++
 .../zookeeper/ZooKeeperHiveLockManager.java     | 41 ++++++++++++++++
 .../zookeeper/TestZookeeperLockManager.java     | 50 ++++++++++++++++++++
 4 files changed, 117 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
index 13c3cf9..88a3c29 100644
--- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
+++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
@@ -33,4 +33,9 @@ public class MetricsConstant {
   public static String JDO_ROLLBACK_TRANSACTIONS = "rollbacked_jdo_transactions";
   public static String JDO_COMMIT_TRANSACTIONS = "committed_jdo_transactions";
   public static String JDO_OPEN_TRANSACTIONS = "opened_jdo_transactions";
+
+  public static String METASTORE_HIVE_LOCKS = "metastore_hive_locks";
+  public static String ZOOKEEPER_HIVE_SHAREDLOCKS = "zookeeper_hive_sharedlocks";
+  public static String ZOOKEEPER_HIVE_EXCLUSIVELOCKS = "zookeeper_hive_exclusivelocks";
+  public static String ZOOKEEPER_HIVE_SEMISHAREDLOCKS = "zookeeper_hive_semisharedlocks";
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
index 82e227f..bb9da9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.lockmgr;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.*;
@@ -99,6 +102,16 @@ public class DbLockManager implements HiveLockManager{
         throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
       }
       acquiredLocks.add(hl);
+
+      Metrics metrics = MetricsFactory.getInstance();
+      if (metrics != null) {
+        try {
+          metrics.incrementCounter(MetricsConstant.METASTORE_HIVE_LOCKS);
+        } catch (Exception e) {
+          LOG.warn("Error Reporting hive client metastore lock operation to Metrics system", e);
+        }
+      }
+
       return res.getState();
     } catch (NoSuchTxnException e) {
       LOG.error("Metastore could not find txnid " + lock.getTxnid());
@@ -133,6 +146,14 @@ public class DbLockManager implements HiveLockManager{
       LOG.debug("Unlocking " + hiveLock);
       client.unlock(lockId);
       boolean removed = locks.remove(hiveLock);
+      Metrics metrics = MetricsFactory.getInstance();
+      if (metrics != null) {
+        try {
+          metrics.decrementCounter(MetricsConstant.METASTORE_HIVE_LOCKS);
+        } catch (Exception e) {
+          LOG.warn("Error Reporting hive client metastore unlock operation to Metrics system", e);
+        }
+      }
       LOG.debug("Removed a lock " + removed);
     } catch (NoSuchLockException e) {
       LOG.error("Metastore could find no record of lock " + JavaUtils.lockIdToString(lockId));

http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
index fb954d8..7c7a8d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
@@ -19,8 +19,12 @@
 package org.apache.hadoop.hive.ql.lockmgr.zookeeper;
 
 import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.lockmgr.*;
@@ -402,7 +406,25 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
         return null;
       }
     }
+    Metrics metrics = MetricsFactory.getInstance();
+    if (metrics != null) {
+      try {
+        switch(mode) {
+        case EXCLUSIVE:
+          metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_EXCLUSIVELOCKS);
+          break;
+        case SEMI_SHARED:
+          metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SEMISHAREDLOCKS);
+          break;
+        default:
+          metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SHAREDLOCKS);
+          break;
+        }
 
+      } catch (Exception e) {
+        LOG.warn("Error Reporting hive client zookeeper lock operation to Metrics system", e);
+      }
+    }
     return new ZooKeeperHiveLock(res, key, mode);
   }
 
@@ -438,6 +460,7 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
   @VisibleForTesting
   static void unlockPrimitive(HiveLock hiveLock, String parent, CuratorFramework curatorFramework) throws LockException {
     ZooKeeperHiveLock zLock = (ZooKeeperHiveLock)hiveLock;
+    HiveLockMode lMode = hiveLock.getHiveLockMode();
     HiveLockObject obj = zLock.getHiveLockObject();
     String name  = getLastObjectName(parent, obj);
     try {
@@ -448,6 +471,24 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
       if (children == null || children.isEmpty()) {
         curatorFramework.delete().forPath(name);
       }
+      Metrics metrics = MetricsFactory.getInstance();
+      if (metrics != null) {
+        try {
+          switch(lMode) {
+          case EXCLUSIVE:
+            metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_EXCLUSIVELOCKS);
+            break;
+          case SEMI_SHARED:
+            metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SEMISHAREDLOCKS);
+            break;
+          default:
+            metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SHAREDLOCKS);
+            break;
+          }
+        } catch (Exception e) {
+          LOG.warn("Error Reporting hive client zookeeper unlock operation to Metrics system", e);
+        }
+      }
     } catch (KeeperException.NoNodeException nne) {
       //can happen in retrying deleting the zLock after exceptions like InterruptedException
       //or in a race condition where parent has already been deleted by other process when it

http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
index 4a1ef2e..7fcaa22 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
@@ -18,7 +18,14 @@
 
 package org.apache.hadoop.hive.ql.lockmgr.zookeeper;
 
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
+import org.apache.hadoop.hive.common.metrics.metrics2.MetricsReporting;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
@@ -33,6 +40,9 @@ import org.junit.Before;
 import org.junit.After;
 import org.junit.Test;
 
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
 public class TestZookeeperLockManager {
 
   private HiveConf conf;
@@ -110,5 +120,45 @@ public class TestZookeeperLockManager {
     conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, "9999");
     Assert.assertEquals("node1:5666,node2:9999,node3:9999", ZooKeeperHiveHelper.getQuorumServers(conf));
   }
+
+  @Test
+  public void testMetrics() throws Exception{
+    conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM, "localhost");
+    conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, String.valueOf(server.getPort()));
+    File workDir = new File(System.getProperty("test.tmp.dir"));
+    File jsonReportFile = new File(workDir, "json_reportingzk1");
+    jsonReportFile.delete();
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true);
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
+    conf.setVar(HiveConf.ConfVars.HIVE_METRICS_REPORTER, MetricsReporting.JSON_FILE.name() + "," + MetricsReporting.JMX.name());
+    conf.setVar(HiveConf.ConfVars.HIVE_METRICS_JSON_FILE_LOCATION, jsonReportFile.toString());
+    conf.setVar(HiveConf.ConfVars.HIVE_METRICS_JSON_FILE_INTERVAL, "100ms");
+    MetricsFactory.init(conf);
+
+    HiveLockManagerCtx ctx = new HiveLockManagerCtx(conf);
+    ZooKeeperHiveLockManager zMgr= new ZooKeeperHiveLockManager();
+    zMgr.setContext(ctx);
+    ZooKeeperHiveLock curLock = zMgr.lock(hiveLock, HiveLockMode.SHARED, false);
+    Thread.sleep(2000);
+    byte[] jsonData = Files.readAllBytes(Paths.get(jsonReportFile.getAbsolutePath()));
+    ObjectMapper objectMapper = new ObjectMapper();
+    JsonNode rootNode = objectMapper.readTree(jsonData);
+    JsonNode countersNode = rootNode.path("counters");
+    JsonNode zkLockNode = countersNode.path("zookeeper_hive_sharedlocks");
+    JsonNode zkLockCountNode = zkLockNode.path("count");
+    Assert.assertTrue(zkLockCountNode.asInt() == 1);
+
+    zMgr.unlock(curLock);
+    Thread.sleep(2000);
+    jsonData = Files.readAllBytes(Paths.get(jsonReportFile.getAbsolutePath()));
+    objectMapper = new ObjectMapper();
+    rootNode = objectMapper.readTree(jsonData);
+    countersNode = rootNode.path("counters");
+    zkLockNode = countersNode.path("zookeeper_hive_sharedlocks");
+    zkLockCountNode = zkLockNode.path("count");
+    Assert.assertTrue(zkLockCountNode.asInt() == 0);
+    zMgr.close();
+  }
+
 }

[19/22] hive git commit: HIVE-11928: ORC footer section can also exceed protobuf message limit (Prasanth Jayachandran reviewed by Sergey Shelukhin and Owen O'Malley)

Posted by se...@apache.org.

HIVE-11928: ORC footer section can also exceed protobuf message limit (Prasanth Jayachandran reviewed by Sergey Shelukhin and Owen O'Malley)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/467a117e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/467a117e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/467a117e

Branch: refs/heads/llap
Commit: 467a117edeb40074957d222386e1800194322a29
Parents: 947871a
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Oct 1 17:04:00 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Oct 1 17:04:00 2015 -0500

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/io/orc/InStream.java  | 24 +++++++++++
 .../hadoop/hive/ql/io/orc/MetadataReader.java   |  2 +-
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       | 43 ++------------------
 3 files changed, 29 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
index 381d97d..6fec8b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
@@ -30,10 +30,12 @@ import org.apache.hadoop.hive.common.DiskRange;
 import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedInputStream;
 
 public abstract class InStream extends InputStream {
 
   private static final Log LOG = LogFactory.getLog(InStream.class);
+  private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
 
   protected final String name;
   protected final long length;
@@ -447,4 +449,26 @@ public abstract class InStream extends InputStream {
       return new CompressedStream(name, input, length, codec, bufferSize);
     }
   }
+
+  /**
+   * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
+   *
+   * @param name       the name of the stream
+   * @param input      the list of ranges of bytes for the stream; from disk or cache
+   * @param length     the length in bytes of the stream
+   * @param codec      the compression codec
+   * @param bufferSize the compression buffer size
+   * @return coded input stream
+   * @throws IOException
+   */
+  public static CodedInputStream createCodedInputStream(String name,
+      List<DiskRange> input,
+      long length,
+      CompressionCodec codec,
+      int bufferSize) throws IOException {
+    InStream inStream = create(name, input, length, codec, bufferSize);
+    CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
+    codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
+    return codedInputStream;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
index 43d2933..1910214 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
@@ -108,7 +108,7 @@ public class MetadataReader {
     // read the footer
     ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
     file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
-    return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
+    return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer",
         Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
         tailLength, codec, bufferSize));
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 36fb858..3bac48a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -48,15 +47,12 @@ import org.apache.hadoop.io.Text;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.protobuf.CodedInputStream;
-import com.google.protobuf.InvalidProtocolBufferException;
 
 public class ReaderImpl implements Reader {
 
   private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
 
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
-  private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20;  // 64MB
-  private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
 
   protected final FileSystem fileSystem;
   protected final Path path;
@@ -387,47 +383,16 @@ public class ReaderImpl implements Reader {
       int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
     bb.position(footerAbsPos);
     bb.limit(footerAbsPos + footerSize);
-    InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
-          new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
-    return OrcProto.Footer.parseFrom(instream);
+    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
   }
 
   private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
       int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
     bb.position(metadataAbsPos);
     bb.limit(metadataAbsPos + metadataSize);
-    InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
-        new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
-    CodedInputStream in = CodedInputStream.newInstance(instream);
-    int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
-    OrcProto.Metadata meta = null;
-    do {
-      try {
-        in.setSizeLimit(msgLimit);
-        meta = OrcProto.Metadata.parseFrom(in);
-      } catch (InvalidProtocolBufferException e) {
-        if (e.getMessage().contains("Protocol message was too large")) {
-          LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
-              " size of the coded input stream." );
-
-          msgLimit = msgLimit << 1;
-          if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
-            LOG.error("Metadata section exceeds max protobuf message size of " +
-                PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
-            throw e;
-          }
-
-          // we must have failed in the middle of reading instream and instream doesn't support
-          // resetting the stream
-          instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
-              new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
-          in = CodedInputStream.newInstance(instream);
-        } else {
-          throw e;
-        }
-      }
-    } while (meta == null);
-    return meta;
+    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
+        Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
   }
 
   private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,

[20/22] hive git commit: HIVE-11995 : Remove repetitively setting permissions in insert/load overwrite partition (Chaoyu Tang via Szehon)

Posted by se...@apache.org.

HIVE-11995 : Remove repetitively setting permissions in insert/load overwrite partition (Chaoyu Tang via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/50744231
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/50744231
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/50744231

Branch: refs/heads/llap
Commit: 507442319985198466b4f6c2ba18c6b068d8435e
Parents: 467a117
Author: Szehon Ho <sz...@cloudera.com>
Authored: Thu Oct 1 15:29:36 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Thu Oct 1 15:29:36 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/security/FolderPermissionBase.java  |  53 +++++++--
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 108 ++++---------------
 2 files changed, 65 insertions(+), 96 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/50744231/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
index d98082f..d7149a7 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
@@ -261,6 +261,7 @@ public abstract class FolderPermissionBase {
 
     //insert overwrite test
     setPermission(warehouseDir + "/" + tableName, 1);
+    setPermission(warehouseDir + "/" + tableName + "/part1=1", 1);
     ret = driver.run("insert overwrite table " + tableName + " partition(part1='1') select key,value from mysrc where part1='1' and part2='1'");
     Assert.assertEquals(0, ret.getResponseCode());
 
@@ -297,6 +298,9 @@ public abstract class FolderPermissionBase {
 
     //insert overwrite test
     setPermission(warehouseDir + "/" + tableName, 1);
+    setPermission(warehouseDir + "/" + tableName + "/part1=1", 1);
+    setPermission(warehouseDir + "/" + tableName + "/part1=1/part2=1", 1);
+
     ret = driver.run("insert overwrite table " + tableName + " partition(part1='1', part2='1') select key,value from mysrc where part1='1' and part2='1'");
     Assert.assertEquals(0, ret.getResponseCode());
 
@@ -325,8 +329,9 @@ public abstract class FolderPermissionBase {
 
     verifyDualPartitionTable(warehouseDir + "/" + tableName, 0);
 
-    //Insert overwrite test, with permission set 1.
-    setPermission(warehouseDir + "/" + tableName, 1);
+    //Insert overwrite test, with permission set 1.  We need reset existing partitions to 1 since the permissions
+    //should be inherited from existing partition
+    setDualPartitionTable(warehouseDir + "/" + tableName, 1);
     ret = driver.run("insert overwrite table " + tableName + " partition (part1,part2) select key,value,part1,part2 from mysrc");
     Assert.assertEquals(0, ret.getResponseCode());
 
@@ -348,8 +353,9 @@ public abstract class FolderPermissionBase {
     Assert.assertEquals(0,ret.getResponseCode());
     verifySinglePartition(tableLoc, 0);
 
-    //Insert overwrite test, with permission set 1.
-    setPermission(tableLoc, 1);
+    //Insert overwrite test, with permission set 1. We need reset existing partitions to 1 since the permissions
+    //should be inherited from existing partition
+    setSinglePartition(tableLoc, 1);
     ret = driver.run("insert overwrite table " + tableName + " partition (part1) select key,value,part1 from mysrc");
     Assert.assertEquals(0,ret.getResponseCode());
     verifySinglePartition(tableLoc, 1);
@@ -458,6 +464,9 @@ public abstract class FolderPermissionBase {
 
     //case1B: load data local into overwrite non-partitioned-table
     setPermission(warehouseDir + "/" + tableName, 1);
+    for (String child : listStatus(tableLoc)) {
+      setPermission(child, 1);
+    }
     ret = driver.run("load data local inpath '" + dataFilePath + "' overwrite into table " + tableName);
     Assert.assertEquals(0,ret.getResponseCode());
 
@@ -485,8 +494,13 @@ public abstract class FolderPermissionBase {
       verifyPermission(child);
     }
 
-    //case 2B: insert data overwrite into non-partitioned table.
+    //case 2B: insert data overwrite into partitioned table. set testing table/partition folder hierarchy 1
+    //local load overwrite just overwrite the existing partition content but not the permission
     setPermission(tableLoc, 1);
+    setPermission(partLoc, 1);
+    for (String child : listStatus(partLoc)) {
+      setPermission(child, 1);
+    }
     ret = driver.run("LOAD DATA LOCAL INPATH '" + dataFilePath + "' OVERWRITE INTO TABLE " + tableName + " PARTITION (part1='1',part2='1')");
     Assert.assertEquals(0,ret.getResponseCode());
 
@@ -521,6 +535,10 @@ public abstract class FolderPermissionBase {
 
     //case1B: load data into overwrite non-partitioned-table
     setPermission(warehouseDir + "/" + tableName, 1);
+    for (String child : listStatus(tableLoc)) {
+      setPermission(child, 1);
+    }
+
     fs.copyFromLocalFile(dataFilePath, new Path(location));
     ret = driver.run("load data inpath '" + location + "' overwrite into table " + tableName);
     Assert.assertEquals(0,ret.getResponseCode());
@@ -550,8 +568,15 @@ public abstract class FolderPermissionBase {
       verifyPermission(child);
     }
 
-    //case 2B: insert data overwrite into non-partitioned table.
+    //case 2B: insert data overwrite into partitioned table. set testing table/partition folder hierarchy 1
+    //load overwrite just overwrite the existing partition content but not the permission
     setPermission(tableLoc, 1);
+    setPermission(partLoc, 1);
+    Assert.assertTrue(listStatus(partLoc).size() > 0);
+    for (String child : listStatus(partLoc)) {
+      setPermission(child, 1);
+    }
+
     fs.copyFromLocalFile(dataFilePath, new Path(location));
     ret = driver.run("LOAD DATA INPATH '" + location + "' OVERWRITE INTO TABLE " + tableName + " PARTITION (part1='1',part2='1')");
     Assert.assertEquals(0,ret.getResponseCode());
@@ -693,7 +718,12 @@ public abstract class FolderPermissionBase {
     assertExistence(partition);
     verifyPermission(partition);    
   }
-  
+
+  private void setSinglePartition(String tableLoc, int index) throws Exception {
+    setPermission(tableLoc + "/part1=1", index);
+    setPermission(tableLoc + "/part1=2", index);
+  }
+
   private void verifySinglePartition(String tableLoc, int index) throws Exception {
     verifyPermission(tableLoc + "/part1=1", index);
     verifyPermission(tableLoc + "/part1=2", index);
@@ -709,6 +739,15 @@ public abstract class FolderPermissionBase {
     }
   }
 
+  private void setDualPartitionTable(String baseTablePath, int index) throws Exception {
+    setPermission(baseTablePath, index);
+    setPermission(baseTablePath + "/part1=1", index);
+    setPermission(baseTablePath + "/part1=1/part2=1", index);
+
+    setPermission(baseTablePath + "/part1=2", index);
+    setPermission(baseTablePath + "/part1=2/part2=2", index);
+  }
+
   private void verifyDualPartitionTable(String baseTablePath, int index) throws Exception {
     verifyPermission(baseTablePath, index);
     verifyPermission(baseTablePath + "/part1=1", index);

http://git-wip-us.apache.org/repos/asf/hive/blob/50744231/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 10cafb6..8efbb05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2932,8 +2932,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
         LOG.info("No sources specified to move: " + srcf);
         return;
       }
-      List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf,
-          true);
+      List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);
 
       if (oldPath != null) {
         try {
@@ -2945,9 +2944,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
             if (FileUtils.isSubDir(oldPath, destf, fs2)) {
               FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
             }
-            if (inheritPerms) {
-              inheritFromTable(tablePath, destf, conf, destFs);
-            }
           }
         } catch (Exception e) {
           //swallow the exception
@@ -2955,58 +2951,24 @@ private void constructOneLBLocationMap(FileStatus fSta,
         }
       }
 
-      // rename src directory to destf
-      if (srcs.length == 1 && srcs[0].isDir()) {
-        // rename can fail if the parent doesn't exist
-        Path destfp = destf.getParent();
-        if (!destFs.exists(destfp)) {
-          boolean success = destFs.mkdirs(destfp);
-          if (!success) {
-            LOG.warn("Error creating directory " + destf.toString());
-          }
-          if (inheritPerms && success) {
-            inheritFromTable(tablePath, destfp, conf, destFs);
-          }
-        }
-
-        // Copy/move each file under the source directory to avoid to delete the destination
-        // directory if it is the root of an HDFS encryption zone.
-        for (List<Path[]> sdpairs : result) {
-          for (Path[] sdpair : sdpairs) {
-            Path destParent = sdpair[1].getParent();
-            FileSystem destParentFs = destParent.getFileSystem(conf);
-            if (!destParentFs.isDirectory(destParent)) {
-              boolean success = destFs.mkdirs(destParent);
-              if (!success) {
-                LOG.warn("Error creating directory " + destParent);
-              }
-              if (inheritPerms && success) {
-                inheritFromTable(tablePath, destParent, conf, destFs);
-              }
-            }
-            if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) {
-              throw new IOException("Unable to move file/directory from " + sdpair[0] +
-                  " to " + sdpair[1]);
-            }
-          }
-        }
-      } else { // srcf is a file or pattern containing wildcards
-        if (!destFs.exists(destf)) {
-          boolean success = destFs.mkdirs(destf);
-          if (!success) {
-            LOG.warn("Error creating directory " + destf.toString());
-          }
-          if (inheritPerms && success) {
-            inheritFromTable(tablePath, destf, conf, destFs);
-          }
-        }
-        // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
-        for (List<Path[]> sdpairs : result) {
-          for (Path[] sdpair : sdpairs) {
-            if (!moveFile(conf, sdpair[0], sdpair[1], true,
-                isSrcLocal)) {
-              throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
-            }
+      // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates
+      // destf with inherited permissions
+      boolean destfExist = FileUtils.mkdir(destFs, destf, true, conf);
+      if(!destfExist) {
+        throw new IOException("Directory " + destf.toString()
+            + " does not exist and could not be created.");
+      }
+
+      // Two cases:
+      // 1. srcs has only a src directory, if rename src directory to destf, we also need to
+      // Copy/move each file under the source directory to avoid to delete the destination
+      // directory if it is the root of an HDFS encryption zone.
+      // 2. srcs must be a list of files -- ensured by LoadSemanticAnalyzer
+      // in both cases, we move the file under destf
+      for (List<Path[]> sdpairs : result) {
+        for (Path[] sdpair : sdpairs) {
+          if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) {
+            throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
           }
         }
       }
@@ -3015,38 +2977,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
   }
 
-  /**
-   * This method sets all paths from tablePath to destf (including destf) to have same permission as tablePath.
-   * @param tablePath path of table
-   * @param destf path of table-subdir.
-   * @param conf
-   * @param fs
-   */
-  private static void inheritFromTable(Path tablePath, Path destf, HiveConf conf, FileSystem fs) {
-    if (!FileUtils.isSubDir(destf, tablePath, fs)) {
-      //partition may not be under the parent.
-      return;
-    }
-    HadoopShims shims = ShimLoader.getHadoopShims();
-    //Calculate all the paths from the table dir, to destf
-    //At end of this loop, currPath is table dir, and pathsToSet contain list of all those paths.
-    Path currPath = destf;
-    List<Path> pathsToSet = new LinkedList<Path>();
-    while (!currPath.equals(tablePath)) {
-      pathsToSet.add(currPath);
-      currPath = currPath.getParent();
-    }
-
-    try {
-      HadoopShims.HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, fs, currPath);
-      for (Path pathToSet : pathsToSet) {
-        shims.setFullFileStatus(conf, fullFileStatus, fs, pathToSet);
-      }
-    } catch (Exception e) {
-      LOG.warn("Error setting permissions or group of " + destf, e);
-    }
-  }
-
   public static boolean isHadoop1() {
     return ShimLoader.getMajorVersion().startsWith("0.20");
   }

[18/22] hive git commit: HIVE-11960 : braces in join conditions are not supported (Sergey Shelukhin, reviewed by Pengcheng Xiong)

Posted by se...@apache.org.

HIVE-11960 : braces in join conditions are not supported (Sergey Shelukhin, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/947871a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/947871a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/947871a3

Branch: refs/heads/llap
Commit: 947871a3b060adbc46cad8d9416117a81f50fd94
Parents: 116c3e3
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:14:12 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:14:12 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/FromClauseParser.g     |  30 +-
 .../apache/hadoop/hive/ql/parse/HiveParser.g    |   7 +-
 ql/src/test/queries/clientpositive/join_parse.q |  20 +
 .../clientnegative/cte_with_in_subquery.q.out   |   2 +-
 .../results/clientpositive/join_parse.q.out     | 516 +++++++++++++++++++
 5 files changed, 568 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
index 038ed99..084c421 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
@@ -94,7 +94,7 @@ joinSource
     ;
 
 uniqueJoinSource
-@init { gParent.pushMsg("join source", state); }
+@init { gParent.pushMsg("unique join source", state); }
 @after { gParent.popMsg(state); }
     : KW_PRESERVE? fromSource uniqueJoinExpr
     ;
@@ -147,6 +147,16 @@ fromSource
 @init { gParent.pushMsg("from source", state); }
 @after { gParent.popMsg(state); }
     :
+    (LPAREN KW_VALUES) => fromSource0
+    | (LPAREN) => LPAREN joinSource RPAREN -> joinSource
+    | fromSource0
+    ;
+
+
+fromSource0
+@init { gParent.pushMsg("from source 0", state); }
+@after { gParent.popMsg(state); }
+    :
     ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)*
     ;
 
@@ -270,11 +280,15 @@ searchCondition
 // INSERT INTO <table> (col1,col2,...) VALUES(...),(...),...
 // INSERT INTO <table> (col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c)
 valueRowConstructor
+@init { gParent.pushMsg("value row constructor", state); }
+@after { gParent.popMsg(state); }
     :
     LPAREN precedenceUnaryPrefixExpression (COMMA precedenceUnaryPrefixExpression)* RPAREN -> ^(TOK_VALUE_ROW precedenceUnaryPrefixExpression+)
     ;
 
 valuesTableConstructor
+@init { gParent.pushMsg("values table constructor", state); }
+@after { gParent.popMsg(state); }
     :
     valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+)
     ;
@@ -285,6 +299,8 @@ VALUES(1,2),(3,4) means 2 rows, 2 columns each.
 VALUES(1,2,3) means 1 row, 3 columns
 */
 valuesClause
+@init { gParent.pushMsg("values clause", state); }
+@after { gParent.popMsg(state); }
     :
     KW_VALUES valuesTableConstructor -> valuesTableConstructor
     ;
@@ -294,16 +310,20 @@ This represents a clause like this:
 (VALUES(1,2),(2,3)) as VirtTable(col1,col2)
 */
 virtualTableSource
-   	:
-   	LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause)
-   	;
+@init { gParent.pushMsg("virtual table source", state); }
+@after { gParent.popMsg(state); }
+   :
+   LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause)
+   ;
 /*
 e.g. as VirtTable(col1,col2)
 Note that we only want literals as column names
 */
 tableNameColList
+@init { gParent.pushMsg("from source", state); }
+@after { gParent.popMsg(state); }
     :
     KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+))
     ;
 
-//-----------------------------------------------------------------------------------
\ No newline at end of file
+//-----------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 3df67e9..161e549 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -376,6 +376,8 @@ import java.util.Collection;
 import java.util.HashMap;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 }
 
 
@@ -618,19 +620,22 @@ import org.apache.hadoop.hive.conf.HiveConf;
     return msg;
   }
   
+  public static final Log LOG = LogFactory.getLog("HiveParser");
   public void pushMsg(String msg, RecognizerSharedState state) {
     // ANTLR generated code does not wrap the @init code wit this backtracking check,
     //  even if the matching @after has it. If we have parser rules with that are doing
     // some lookahead with syntactic predicates this can cause the push() and pop() calls
     // to become unbalanced, so make sure both push/pop check the backtracking state.
     if (state.backtracking == 0) {
+      // LOG.debug("Push " + msg);
       msgs.push(msg);
     }
   }
 
   public void popMsg(RecognizerSharedState state) {
     if (state.backtracking == 0) {
-      msgs.pop();
+      Object o = msgs.pop();
+      // LOG.debug("Pop " + o);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/queries/clientpositive/join_parse.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_parse.q b/ql/src/test/queries/clientpositive/join_parse.q
new file mode 100644
index 0000000..5955efd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_parse.q
@@ -0,0 +1,20 @@
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value;
+
+explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value;
+
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value);
+
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key;

http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out b/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
index fa22b48..1122ca5 100644
--- a/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
+++ b/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
@@ -1 +1 @@
-FAILED: ParseException line 1:64 Failed to recognize predicate 'select'. Failed rule: 'queryStatementExpression' in subquery source
+FAILED: ParseException line 1:20 cannot recognize input near 'with' 'q1' 'as' in from source

http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/results/clientpositive/join_parse.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_parse.q.out b/ql/src/test/results/clientpositive/join_parse.q.out
new file mode 100644
index 0000000..e1a23a0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_parse.q.out
@@ -0,0 +1,516 @@
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (value is not null and key is not null) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: value is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: value (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (value is not null and key is not null) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: value is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: value (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (value is not null and key is not null) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: value is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: value (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col2
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col2 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-4
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and value is not null) (type: boolean)
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col1 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col0, _col4
+          Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col4 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (key is not null and value is not null) (type: boolean)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col1
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[16/22] hive git commit: HIVE-11970 : COLUMNS_V2 table in metastore should have a longer name field (Sergey Shelukhin, reviewed by Alan Gates)

Posted by se...@apache.org.

HIVE-11970 : COLUMNS_V2 table in metastore should have a longer name field (Sergey Shelukhin, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a953b436
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a953b436
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a953b436

Branch: refs/heads/llap
Commit: a953b43628bf0dbbb9aadc4ae936e8c964534f3f
Parents: 82bc0e1
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:09:36 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:09:36 2015 -0700

----------------------------------------------------------------------
 .../upgrade/derby/021-HIVE-11970.derby.sql      |  6 +++++
 .../upgrade/derby/hive-schema-1.3.0.derby.sql   | 12 +++++-----
 .../upgrade/derby/hive-schema-2.0.0.derby.sql   | 12 +++++-----
 .../derby/upgrade-1.2.0-to-1.3.0.derby.sql      |  1 +
 .../derby/upgrade-1.2.0-to-2.0.0.derby.sql      |  3 ++-
 .../upgrade/mssql/007-HIVE-11970.mssql.sql      |  6 +++++
 .../upgrade/mssql/hive-schema-1.3.0.mssql.sql   | 12 +++++-----
 .../upgrade/mssql/hive-schema-2.0.0.mssql.sql   | 12 +++++-----
 .../mssql/upgrade-1.2.0-to-1.3.0.mssql.sql      |  1 +
 .../mssql/upgrade-1.2.0-to-2.0.0.mssql.sql      |  7 +++---
 .../upgrade/mysql/022-HIVE-11970.mysql.sql      |  6 +++++
 .../upgrade/mysql/hive-schema-1.3.0.mysql.sql   | 12 +++++-----
 .../upgrade/mysql/hive-schema-2.0.0.mysql.sql   | 12 +++++-----
 .../mysql/upgrade-1.2.0-to-1.3.0.mysql.sql      |  1 +
 .../mysql/upgrade-1.2.0-to-2.0.0.mysql.sql      |  2 ++
 .../upgrade/oracle/022-HIVE-11970.oracle.sql    | 23 ++++++++++++++++++++
 .../upgrade/oracle/hive-schema-1.3.0.oracle.sql | 12 +++++-----
 .../upgrade/oracle/hive-schema-2.0.0.oracle.sql | 12 +++++-----
 .../oracle/upgrade-1.2.0-to-1.3.0.oracle.sql    |  2 ++
 .../oracle/upgrade-1.2.0-to-2.0.0.oracle.sql    |  2 ++
 .../postgres/021-HIVE-11970.postgres.sql        |  6 +++++
 .../postgres/hive-schema-1.3.0.postgres.sql     | 12 +++++-----
 .../postgres/hive-schema-2.0.0.postgres.sql     | 12 +++++-----
 .../upgrade-1.2.0-to-1.3.0.postgres.sql         |  1 +
 .../upgrade-1.2.0-to-2.0.0.postgres.sql         |  1 +
 25 files changed, 124 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql b/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
new file mode 100644
index 0000000..6a01a53
--- /dev/null
+++ b/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "PART_COL_PRIVS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "TBL_COL_PRIVS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "SORT_COLS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "TAB_COL_STATS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "PART_COL_STATS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
index c50375f..20eb326 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
@@ -22,13 +22,13 @@ CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" IN
 
 CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
 
-CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
+CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
 
 CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
 
-CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(1000) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
 
-CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
 
 CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL);
 
@@ -64,7 +64,7 @@ CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT N
 
 CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
 
-CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
+CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
 
 CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL);
 
@@ -94,9 +94,9 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as
 
 CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767));
 
-CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
 
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
 
 CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
index 1cc0a24..abc6bf4 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
@@ -22,13 +22,13 @@ CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" IN
 
 CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
 
-CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
+CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
 
 CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
 
-CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(1000) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
 
-CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
 
 CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL);
 
@@ -64,7 +64,7 @@ CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT N
 
 CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
 
-CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
+CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
 
 CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL);
 
@@ -94,9 +94,9 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as
 
 CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767));
 
-CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
 
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
 
 CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
index dee744b..6359de4 100644
--- a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
@@ -1,3 +1,4 @@
 -- Upgrade MetaStore schema from 1.2.0 to 1.3.0
+RUN '021-HIVE-11970.derby.sql';
 
 UPDATE "APP".VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
index 976ebd1..adf3cb0 100644
--- a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
@@ -1,3 +1,4 @@
--- Upgrade MetaStore schema from 1.3.0 to 2.0.0
+-- Upgrade MetaStore schema from 1.2.0 to 2.0.0
+RUN '021-HIVE-11970.derby.sql';
 
 UPDATE "APP".VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql b/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
new file mode 100644
index 0000000..69b11a2
--- /dev/null
+++ b/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;
+ALTER TABLE "PART_COL_PRIVS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "TBL_COL_PRIVS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "SORT_COLS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "TAB_COL_STATS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;
+ALTER TABLE "PART_COL_STATS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
index 7165edd..01c1376 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
@@ -75,7 +75,7 @@ CREATE TABLE PART_COL_STATS
 (
     CS_ID bigint NOT NULL,
     AVG_COL_LEN float NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     COLUMN_TYPE nvarchar(128) NOT NULL,
     DB_NAME nvarchar(128) NOT NULL,
     BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -184,7 +184,7 @@ ALTER TABLE GLOBAL_PRIVS ADD CONSTRAINT GLOBAL_PRIVS_PK PRIMARY KEY (USER_GRANT_
 CREATE TABLE PART_COL_PRIVS
 (
     PART_COLUMN_GRANT_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     CREATE_TIME int NOT NULL,
     GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
     GRANTOR nvarchar(128) NULL,
@@ -218,7 +218,7 @@ CREATE TABLE TAB_COL_STATS
 (
     CS_ID bigint NOT NULL,
     AVG_COL_LEN float NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     COLUMN_TYPE nvarchar(128) NOT NULL,
     DB_NAME nvarchar(128) NOT NULL,
     BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -283,7 +283,7 @@ ALTER TABLE DBS ADD CONSTRAINT DBS_PK PRIMARY KEY (DB_ID);
 CREATE TABLE TBL_COL_PRIVS
 (
     TBL_COLUMN_GRANT_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     CREATE_TIME int NOT NULL,
     GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
     GRANTOR nvarchar(128) NULL,
@@ -396,7 +396,7 @@ ALTER TABLE PARTITION_EVENTS ADD CONSTRAINT PARTITION_EVENTS_PK PRIMARY KEY (PAR
 CREATE TABLE SORT_COLS
 (
     SD_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     "ORDER" int NOT NULL,
     INTEGER_IDX int NOT NULL
 );
@@ -533,7 +533,7 @@ CREATE TABLE COLUMNS_V2
 (
     CD_ID bigint NOT NULL,
     COMMENT nvarchar(256) NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     TYPE_NAME nvarchar(4000) NOT NULL,
     INTEGER_IDX int NOT NULL
 );

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
index 36e7c1f..1ec8632 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
@@ -75,7 +75,7 @@ CREATE TABLE PART_COL_STATS
 (
     CS_ID bigint NOT NULL,
     AVG_COL_LEN float NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     COLUMN_TYPE nvarchar(128) NOT NULL,
     DB_NAME nvarchar(128) NOT NULL,
     BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -184,7 +184,7 @@ ALTER TABLE GLOBAL_PRIVS ADD CONSTRAINT GLOBAL_PRIVS_PK PRIMARY KEY (USER_GRANT_
 CREATE TABLE PART_COL_PRIVS
 (
     PART_COLUMN_GRANT_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     CREATE_TIME int NOT NULL,
     GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
     GRANTOR nvarchar(128) NULL,
@@ -218,7 +218,7 @@ CREATE TABLE TAB_COL_STATS
 (
     CS_ID bigint NOT NULL,
     AVG_COL_LEN float NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     COLUMN_TYPE nvarchar(128) NOT NULL,
     DB_NAME nvarchar(128) NOT NULL,
     BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -283,7 +283,7 @@ ALTER TABLE DBS ADD CONSTRAINT DBS_PK PRIMARY KEY (DB_ID);
 CREATE TABLE TBL_COL_PRIVS
 (
     TBL_COLUMN_GRANT_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     CREATE_TIME int NOT NULL,
     GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
     GRANTOR nvarchar(128) NULL,
@@ -396,7 +396,7 @@ ALTER TABLE PARTITION_EVENTS ADD CONSTRAINT PARTITION_EVENTS_PK PRIMARY KEY (PAR
 CREATE TABLE SORT_COLS
 (
     SD_ID bigint NOT NULL,
-    "COLUMN_NAME" nvarchar(128) NULL,
+    "COLUMN_NAME" nvarchar(1000) NULL,
     "ORDER" int NOT NULL,
     INTEGER_IDX int NOT NULL
 );
@@ -533,7 +533,7 @@ CREATE TABLE COLUMNS_V2
 (
     CD_ID bigint NOT NULL,
     COMMENT nvarchar(256) NULL,
-    "COLUMN_NAME" nvarchar(128) NOT NULL,
+    "COLUMN_NAME" nvarchar(1000) NOT NULL,
     TYPE_NAME nvarchar(4000) NOT NULL,
     INTEGER_IDX int NOT NULL
 );

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
index fd12a92..9cf9d25 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
@@ -1,5 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
 
+:r 007-HIVE-11970.mssql.sql;
 
 UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
index fd12a92..de3d29d 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
@@ -1,5 +1,6 @@
-SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
+SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS MESSAGE;
 
+:r 007-HIVE-11970.mssql.sql;
 
-UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
+UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
+SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS MESSAGE;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql b/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
new file mode 100644
index 0000000..4517e00
--- /dev/null
+++ b/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
@@ -0,0 +1,6 @@
+ALTER TABLE `COLUMNS_V2` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;
+ALTER TABLE `PART_COL_PRIVS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `TBL_COL_PRIVS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `SORT_COLS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `TAB_COL_STATS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;
+ALTER TABLE `PART_COL_STATS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
index 71de138..ce0ac54 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
@@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS `CDS` (
 CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
   `CD_ID` bigint(20) NOT NULL,
   `COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
   `TYPE_NAME` varchar(4000) DEFAULT NULL,
   `INTEGER_IDX` int(11) NOT NULL,
   PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
@@ -296,7 +296,7 @@ CREATE TABLE IF NOT EXISTS `PARTITION_PARAMS` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `PART_COL_PRIVS` (
   `PART_COLUMN_GRANT_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `CREATE_TIME` int(11) NOT NULL,
   `GRANT_OPTION` smallint(6) NOT NULL,
   `GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -544,7 +544,7 @@ CREATE TABLE IF NOT EXISTS `SKEWED_VALUES` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `SORT_COLS` (
   `SD_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `ORDER` int(11) NOT NULL,
   `INTEGER_IDX` int(11) NOT NULL,
   PRIMARY KEY (`SD_ID`,`INTEGER_IDX`),
@@ -604,7 +604,7 @@ CREATE TABLE IF NOT EXISTS `TBLS` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `TBL_COL_PRIVS` (
   `TBL_COLUMN_GRANT_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `CREATE_TIME` int(11) NOT NULL,
   `GRANT_OPTION` smallint(6) NOT NULL,
   `GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -650,7 +650,7 @@ CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` (
  `CS_ID` bigint(20) NOT NULL,
  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TBL_ID` bigint(20) NOT NULL,
  `LONG_LOW_VALUE` bigint(20),
@@ -678,7 +678,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `PART_ID` bigint(20) NOT NULL,
  `LONG_LOW_VALUE` bigint(20),

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
index 6547cf1..3a2c0e2 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
@@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS `CDS` (
 CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
   `CD_ID` bigint(20) NOT NULL,
   `COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
   `TYPE_NAME` varchar(4000) DEFAULT NULL,
   `INTEGER_IDX` int(11) NOT NULL,
   PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
@@ -296,7 +296,7 @@ CREATE TABLE IF NOT EXISTS `PARTITION_PARAMS` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `PART_COL_PRIVS` (
   `PART_COLUMN_GRANT_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `CREATE_TIME` int(11) NOT NULL,
   `GRANT_OPTION` smallint(6) NOT NULL,
   `GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -544,7 +544,7 @@ CREATE TABLE IF NOT EXISTS `SKEWED_VALUES` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `SORT_COLS` (
   `SD_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `ORDER` int(11) NOT NULL,
   `INTEGER_IDX` int(11) NOT NULL,
   PRIMARY KEY (`SD_ID`,`INTEGER_IDX`),
@@ -604,7 +604,7 @@ CREATE TABLE IF NOT EXISTS `TBLS` (
 /*!40101 SET character_set_client = utf8 */;
 CREATE TABLE IF NOT EXISTS `TBL_COL_PRIVS` (
   `TBL_COLUMN_GRANT_ID` bigint(20) NOT NULL,
-  `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+  `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
   `CREATE_TIME` int(11) NOT NULL,
   `GRANT_OPTION` smallint(6) NOT NULL,
   `GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -650,7 +650,7 @@ CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` (
  `CS_ID` bigint(20) NOT NULL,
  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TBL_ID` bigint(20) NOT NULL,
  `LONG_LOW_VALUE` bigint(20),
@@ -678,7 +678,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
  `PART_ID` bigint(20) NOT NULL,
  `LONG_LOW_VALUE` bigint(20),

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
index 44a9946..1b32d93 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
@@ -1,4 +1,5 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS ' ';
 SOURCE 021-HIVE-7018.mysql.sql;
+SOURCE 022-HIVE-11970.mysql.sql;
 UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS ' ';

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
index 22d5242..1340f27 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
@@ -1,4 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS ' ';
+SOURCE 021-HIVE-7018.mysql.sql;
+SOURCE 022-HIVE-11970.mysql.sql;
 UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS ' ';
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql b/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
new file mode 100644
index 0000000..2f11157
--- /dev/null
+++ b/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
@@ -0,0 +1,23 @@
+ALTER TABLE COLUMNS_V2 MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE PART_COL_PRIVS MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE TBL_COL_PRIVS MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE SORT_COLS MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE TAB_COL_STATS MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE PART_COL_STATS MODIFY (
+  "COLUMN_NAME" VARCHAR2(1000)
+);
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
index 7605bc7..d2e7945 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
@@ -29,7 +29,7 @@ ALTER TABLE NUCLEUS_TABLES ADD CONSTRAINT NUCLEUS_TABLES_PK PRIMARY KEY (CLASS_N
 CREATE TABLE PART_COL_PRIVS
 (
     PART_COLUMN_GRANT_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     CREATE_TIME NUMBER (10) NOT NULL,
     GRANT_OPTION NUMBER (5) NOT NULL,
     GRANTOR VARCHAR2(128) NULL,
@@ -55,7 +55,7 @@ CREATE TABLE COLUMNS_V2
 (
     CD_ID NUMBER NOT NULL,
     "COMMENT" VARCHAR2(256) NULL,
-    "COLUMN_NAME" VARCHAR2(128) NOT NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NOT NULL,
     TYPE_NAME VARCHAR2(4000) NOT NULL,
     INTEGER_IDX NUMBER(10) NOT NULL
 );
@@ -166,7 +166,7 @@ ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_PK PRIMARY KEY (INDEX_ID,PA
 CREATE TABLE TBL_COL_PRIVS
 (
     TBL_COLUMN_GRANT_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     CREATE_TIME NUMBER (10) NOT NULL,
     GRANT_OPTION NUMBER (5) NOT NULL,
     GRANTOR VARCHAR2(128) NULL,
@@ -272,7 +272,7 @@ ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_PK PRIMARY KEY (TBL_ID,PARA
 CREATE TABLE SORT_COLS
 (
     SD_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     "ORDER" NUMBER (10) NOT NULL,
     INTEGER_IDX NUMBER(10) NOT NULL
 );
@@ -467,7 +467,7 @@ CREATE TABLE TAB_COL_STATS (
  CS_ID NUMBER NOT NULL,
  DB_NAME VARCHAR2(128) NOT NULL,
  TABLE_NAME VARCHAR2(128) NOT NULL, 
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
  COLUMN_TYPE VARCHAR2(128) NOT NULL,
  TBL_ID NUMBER NOT NULL,
  LONG_LOW_VALUE NUMBER,
@@ -503,7 +503,7 @@ CREATE TABLE PART_COL_STATS (
  DB_NAME VARCHAR2(128) NOT NULL,
  TABLE_NAME VARCHAR2(128) NOT NULL,
  PARTITION_NAME VARCHAR2(767) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
  COLUMN_TYPE VARCHAR2(128) NOT NULL,
  PART_ID NUMBER NOT NULL,
  LONG_LOW_VALUE NUMBER,

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
index 8d963ce..2dcdd77 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
@@ -29,7 +29,7 @@ ALTER TABLE NUCLEUS_TABLES ADD CONSTRAINT NUCLEUS_TABLES_PK PRIMARY KEY (CLASS_N
 CREATE TABLE PART_COL_PRIVS
 (
     PART_COLUMN_GRANT_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     CREATE_TIME NUMBER (10) NOT NULL,
     GRANT_OPTION NUMBER (5) NOT NULL,
     GRANTOR VARCHAR2(128) NULL,
@@ -55,7 +55,7 @@ CREATE TABLE COLUMNS_V2
 (
     CD_ID NUMBER NOT NULL,
     "COMMENT" VARCHAR2(256) NULL,
-    "COLUMN_NAME" VARCHAR2(128) NOT NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NOT NULL,
     TYPE_NAME VARCHAR2(4000) NOT NULL,
     INTEGER_IDX NUMBER(10) NOT NULL
 );
@@ -166,7 +166,7 @@ ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_PK PRIMARY KEY (INDEX_ID,PA
 CREATE TABLE TBL_COL_PRIVS
 (
     TBL_COLUMN_GRANT_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     CREATE_TIME NUMBER (10) NOT NULL,
     GRANT_OPTION NUMBER (5) NOT NULL,
     GRANTOR VARCHAR2(128) NULL,
@@ -272,7 +272,7 @@ ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_PK PRIMARY KEY (TBL_ID,PARA
 CREATE TABLE SORT_COLS
 (
     SD_ID NUMBER NOT NULL,
-    "COLUMN_NAME" VARCHAR2(128) NULL,
+    "COLUMN_NAME" VARCHAR2(1000) NULL,
     "ORDER" NUMBER (10) NOT NULL,
     INTEGER_IDX NUMBER(10) NOT NULL
 );
@@ -467,7 +467,7 @@ CREATE TABLE TAB_COL_STATS (
  CS_ID NUMBER NOT NULL,
  DB_NAME VARCHAR2(128) NOT NULL,
  TABLE_NAME VARCHAR2(128) NOT NULL, 
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
  COLUMN_TYPE VARCHAR2(128) NOT NULL,
  TBL_ID NUMBER NOT NULL,
  LONG_LOW_VALUE NUMBER,
@@ -503,7 +503,7 @@ CREATE TABLE PART_COL_STATS (
  DB_NAME VARCHAR2(128) NOT NULL,
  TABLE_NAME VARCHAR2(128) NOT NULL,
  PARTITION_NAME VARCHAR2(767) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
  COLUMN_TYPE VARCHAR2(128) NOT NULL,
  PART_ID NUMBER NOT NULL,
  LONG_LOW_VALUE NUMBER,

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
index f072a1c..bd283d4 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
@@ -1,4 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS Status from dual;
 
+@022-HIVE-11970.oracle.sql;
+
 UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS Status from dual;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
index efff2c9..ceb09be 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
@@ -1,4 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS Status from dual;
 
+@022-HIVE-11970.oracle.sql;
+
 UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS Status from dual;

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql b/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
new file mode 100644
index 0000000..08cb4a5
--- /dev/null
+++ b/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "PART_COL_PRIVS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "TBL_COL_PRIVS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "SORT_COLS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "TAB_COL_STATS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "PART_COL_STATS" ALTER  "COLUMN_NAME" TYPE character varying(1000);

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
index 3ab5b3e..9bb5765 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
@@ -42,7 +42,7 @@ CREATE TABLE "CDS" (
 CREATE TABLE "COLUMNS_V2" (
     "CD_ID" bigint NOT NULL,
     "COMMENT" character varying(4000),
-    "COLUMN_NAME" character varying(128) NOT NULL,
+    "COLUMN_NAME" character varying(1000) NOT NULL,
     "TYPE_NAME" character varying(4000),
     "INTEGER_IDX" integer NOT NULL
 );
@@ -217,7 +217,7 @@ CREATE TABLE "PARTITION_PARAMS" (
 
 CREATE TABLE "PART_COL_PRIVS" (
     "PART_COLUMN_GRANT_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "CREATE_TIME" bigint NOT NULL,
     "GRANT_OPTION" smallint NOT NULL,
     "GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -340,7 +340,7 @@ CREATE TABLE "SERDE_PARAMS" (
 
 CREATE TABLE "SORT_COLS" (
     "SD_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "ORDER" bigint NOT NULL,
     "INTEGER_IDX" bigint NOT NULL
 );
@@ -382,7 +382,7 @@ CREATE TABLE "TBLS" (
 
 CREATE TABLE "TBL_COL_PRIVS" (
     "TBL_COLUMN_GRANT_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "CREATE_TIME" bigint NOT NULL,
     "GRANT_OPTION" smallint NOT NULL,
     "GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -486,7 +486,7 @@ CREATE TABLE "TAB_COL_STATS" (
  "CS_ID" bigint NOT NULL,
  "DB_NAME" character varying(128) DEFAULT NULL::character varying,
  "TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
  "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
  "TBL_ID" bigint NOT NULL,
  "LONG_LOW_VALUE" bigint,
@@ -522,7 +522,7 @@ CREATE TABLE "PART_COL_STATS" (
  "DB_NAME" character varying(128) DEFAULT NULL::character varying,
  "TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
  "PARTITION_NAME" character varying(767) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
  "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
  "PART_ID" bigint NOT NULL,
  "LONG_LOW_VALUE" bigint,

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
index 6442eb1..c749a29 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
@@ -42,7 +42,7 @@ CREATE TABLE "CDS" (
 CREATE TABLE "COLUMNS_V2" (
     "CD_ID" bigint NOT NULL,
     "COMMENT" character varying(4000),
-    "COLUMN_NAME" character varying(128) NOT NULL,
+    "COLUMN_NAME" character varying(1000) NOT NULL,
     "TYPE_NAME" character varying(4000),
     "INTEGER_IDX" integer NOT NULL
 );
@@ -217,7 +217,7 @@ CREATE TABLE "PARTITION_PARAMS" (
 
 CREATE TABLE "PART_COL_PRIVS" (
     "PART_COLUMN_GRANT_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "CREATE_TIME" bigint NOT NULL,
     "GRANT_OPTION" smallint NOT NULL,
     "GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -340,7 +340,7 @@ CREATE TABLE "SERDE_PARAMS" (
 
 CREATE TABLE "SORT_COLS" (
     "SD_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "ORDER" bigint NOT NULL,
     "INTEGER_IDX" bigint NOT NULL
 );
@@ -382,7 +382,7 @@ CREATE TABLE "TBLS" (
 
 CREATE TABLE "TBL_COL_PRIVS" (
     "TBL_COLUMN_GRANT_ID" bigint NOT NULL,
-    "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+    "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
     "CREATE_TIME" bigint NOT NULL,
     "GRANT_OPTION" smallint NOT NULL,
     "GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -486,7 +486,7 @@ CREATE TABLE "TAB_COL_STATS" (
  "CS_ID" bigint NOT NULL,
  "DB_NAME" character varying(128) DEFAULT NULL::character varying,
  "TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
  "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
  "TBL_ID" bigint NOT NULL,
  "LONG_LOW_VALUE" bigint,
@@ -522,7 +522,7 @@ CREATE TABLE "PART_COL_STATS" (
  "DB_NAME" character varying(128) DEFAULT NULL::character varying,
  "TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
  "PARTITION_NAME" character varying(767) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
  "COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
  "PART_ID" bigint NOT NULL,
  "LONG_LOW_VALUE" bigint,

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
index cdd3792..b1ec241 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
@@ -1,5 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0';
 
+\i 021-HIVE-11970.postgres.sql;
 
 UPDATE "VERSION" SET "SCHEMA_VERSION"='1.3.0', "VERSION_COMMENT"='Hive release version 1.3.0' where "VER_ID"=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0';

http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
index b6b0c35..628444c 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
@@ -1,5 +1,6 @@
 SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0';
 
+\i 021-HIVE-11970.postgres.sql;
 
 UPDATE "VERSION" SET "SCHEMA_VERSION"='2.0.0', "VERSION_COMMENT"='Hive release version 2.0.0' where "VER_ID"=1;
 SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0';

[13/22] hive git commit: HIVE-11982 : Some test cases for union all fail with recent changes (Yongzhi Chen via Szehon)

Posted by se...@apache.org.

HIVE-11982 : Some test cases for union all fail with recent changes (Yongzhi Chen via Szehon)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/522bb600
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/522bb600
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/522bb600

Branch: refs/heads/llap
Commit: 522bb600b54cf7667de7bfa75cb286e680018842
Parents: 7b1ed3d
Author: Szehon Ho <sz...@cloudera.com>
Authored: Thu Oct 1 11:33:39 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Thu Oct 1 11:33:39 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/522bb600/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index 8bcb464..2207cfb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -270,7 +270,7 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
 
     for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
       if (child instanceof UnionOperator) {
-        prunList = prunedColLists.get(child);
+        prunList = genColLists(curOp, child);
         if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
           continue;
         }

[05/22] hive git commit: HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.

HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d43e876
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d43e876
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d43e876

Branch: refs/heads/llap
Commit: 0d43e876be9c36156a28bd2c2b9493f986841dd7
Parents: edd6300
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:05:34 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:05:34 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/txn/TxnHandler.java   | 117 +++++++++----------
 1 file changed, 57 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0d43e876/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 0b19368..cc7e2c6 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -91,8 +91,8 @@ public class TxnHandler {
   /**
    * Number of consecutive deadlocks we have seen
    */
-  protected int deadlockCnt;
-  private long deadlockRetryInterval;
+  private int deadlockCnt;
+  private final long deadlockRetryInterval;
   protected HiveConf conf;
   protected DatabaseProduct dbProduct;
 
@@ -115,10 +115,8 @@ public class TxnHandler {
   //
   // All public methods that write to the database have to check for deadlocks when a SQLException
   // comes back and handle it if they see one.  This has to be done with the connection pooling
-  // in mind.  To do this they should call detectDeadlock AFTER rolling back the db transaction,
-  // and then in an outer loop they should catch DeadlockException.  In the catch for this they
-  // should increment the deadlock counter and recall themselves.  See commitTxn for an example.
-  // the connection has been closed and returned to the pool.
+  // in mind.  To do this they should call checkRetryable() AFTER rolling back the db transaction,
+  // and then they should catch RetryException and call themselves recursively. See commitTxn for an example.
 
   public TxnHandler(HiveConf conf) {
     this.conf = conf;
@@ -135,7 +133,6 @@ public class TxnHandler {
     }
 
     timeout = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_TIMEOUT, TimeUnit.MILLISECONDS);
-    deadlockCnt = 0;
     buildJumpTable();
     retryInterval = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HMSHANDLERINTERVAL,
         TimeUnit.MILLISECONDS);
@@ -280,7 +277,6 @@ public class TxnHandler {
   }
 
   public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
-    deadlockCnt = 0;  // Reset deadlock count since this is a new transaction
     int numTxns = rqst.getNum_txns();
     try {
       Connection dbConn = null;
@@ -420,7 +416,6 @@ public class TxnHandler {
 
   public LockResponse lock(LockRequest rqst)
     throws NoSuchTxnException, TxnAbortedException, MetaException {
-    deadlockCnt = 0;
     try {
       Connection dbConn = null;
       try {
@@ -636,8 +631,6 @@ public class TxnHandler {
       }
     } catch (RetryException e) {
       heartbeat(ids);
-    } finally {
-      deadlockCnt = 0;
     }
   }
 
@@ -903,14 +896,14 @@ public class TxnHandler {
 
   void rollbackDBConn(Connection dbConn) {
     try {
-      if (dbConn != null) dbConn.rollback();
+      if (dbConn != null && !dbConn.isClosed()) dbConn.rollback();
     } catch (SQLException e) {
       LOG.warn("Failed to rollback db connection " + getMessage(e));
     }
   }
   protected void closeDbConn(Connection dbConn) {
     try {
-      if (dbConn != null) dbConn.close();
+      if (dbConn != null && !dbConn.isClosed()) dbConn.close();
     } catch (SQLException e) {
       LOG.warn("Failed to close db connection " + getMessage(e));
     }
@@ -922,7 +915,7 @@ public class TxnHandler {
    */
   protected void closeStmt(Statement stmt) {
     try {
-      if (stmt != null) stmt.close();
+      if (stmt != null && !stmt.isClosed()) stmt.close();
     } catch (SQLException e) {
       LOG.warn("Failed to close statement " + getMessage(e));
     }
@@ -952,15 +945,14 @@ public class TxnHandler {
     closeDbConn(dbConn);
   }
   /**
-   * Determine if an exception was such that it makse sense to retry.  Unfortunately there is no standard way to do
+   * Determine if an exception was such that it makes sense to retry.  Unfortunately there is no standard way to do
    * this, so we have to inspect the error messages and catch the telltale signs for each
-   * different database.
+   * different database.  This method will throw {@code RetryException}
+   * if the error is retry-able.
    * @param conn database connection
    * @param e exception that was thrown.
-   * @param caller name of the method calling this
-   * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when deadlock
-   * detected and retry count has not been exceeded.
-   * TODO: make "caller" more elaborate like include lockId for example
+   * @param caller name of the method calling this (and other info useful to log)
+   * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when the operation should be retried
    */
   protected void checkRetryable(Connection conn,
                                 SQLException e,
@@ -973,53 +965,57 @@ public class TxnHandler {
     // so I've tried to capture the different error messages (there appear to be fewer different
     // error messages than SQL states).
     // Derby and newer MySQL driver use the new SQLTransactionRollbackException
-    if (dbProduct == null && conn != null) {
-      determineDatabaseProduct(conn);
-    }
-    if (e instanceof SQLTransactionRollbackException ||
-      ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
-        dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
-      (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
-      (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
-        || e.getMessage().contains("can't serialize access for this transaction")))) {
-      if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
-        long waitInterval = deadlockRetryInterval * deadlockCnt;
-        LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
-          "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
-        // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
-        try {
-          Thread.sleep(waitInterval);
-        } catch (InterruptedException ie) {
-          // NOP
-        }
-        throw new RetryException();
-      } else {
-        LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
-        deadlockCnt = 0;
+    boolean sendRetrySignal = false;
+    try {
+      if (dbProduct == null && conn != null) {
+        determineDatabaseProduct(conn);
       }
-    }
-    else if(isRetryable(e)) {
-      //in MSSQL this means Communication Link Failure
-      if(retryNum++ < retryLimit) {
-        LOG.warn("Retryable error detected in " + caller + ".  Will wait " + retryInterval +
-          "ms and retry up to " + (retryLimit - retryNum + 1) + " times.  Error: " + getMessage(e));
-        try {
-          Thread.sleep(retryInterval);
+      if (e instanceof SQLTransactionRollbackException ||
+        ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
+          dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
+        (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
+        (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
+          || e.getMessage().contains("can't serialize access for this transaction")))) {
+        if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
+          long waitInterval = deadlockRetryInterval * deadlockCnt;
+          LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
+            "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
+          // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
+          try {
+            Thread.sleep(waitInterval);
+          } catch (InterruptedException ie) {
+            // NOP
+          }
+          sendRetrySignal = true;
+        } else {
+          LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
         }
-        catch(InterruptedException ex) {
-          //
+      } else if (isRetryable(e)) {
+        //in MSSQL this means Communication Link Failure
+        if (retryNum++ < retryLimit) {
+          LOG.warn("Retryable error detected in " + caller + ".  Will wait " + retryInterval +
+            "ms and retry up to " + (retryLimit - retryNum + 1) + " times.  Error: " + getMessage(e));
+          try {
+            Thread.sleep(retryInterval);
+          } catch (InterruptedException ex) {
+            //
+          }
+          sendRetrySignal = true;
+        } else {
+          LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
         }
-        throw new RetryException();
       }
-      else {
-        LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
+    }
+    finally {
+      /*if this method ends with anything except a retry signal, the caller should fail the operation
+      and propagate the error up to the its caller (Metastore client); thus must reset retry counters*/
+      if(!sendRetrySignal) {
+        deadlockCnt = 0;
         retryNum = 0;
       }
     }
-    else {
-      //if here, we got something that will propagate the error (rather than retry), so reset counters
-      deadlockCnt = 0;
-      retryNum = 0;
+    if(sendRetrySignal) {
+      throw new RetryException();
     }
   }
 
@@ -2100,6 +2096,7 @@ public class TxnHandler {
         //in MSSQL this means Communication Link Failure
         return true;
       }
+      //see https://issues.apache.org/jira/browse/HIVE-9938
     }
     return false;
   }

[04/22] hive git commit: HIVE-11915 : BoneCP returns closed connections from the pool (Sergey Shelukhin, reviewed by Thejar M Nair)

Posted by se...@apache.org.

HIVE-11915 : BoneCP returns closed connections from the pool (Sergey Shelukhin, reviewed by Thejar M Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edd63004
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edd63004
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edd63004

Branch: refs/heads/llap
Commit: edd63004375602bf7550513380ec25cf34ca5cf5
Parents: 50b6d0c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 30 15:23:25 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 30 15:24:48 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/txn/TxnHandler.java   | 25 +++++++++++---------
 1 file changed, 14 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/edd63004/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 8597d9f..0b19368 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -84,6 +84,7 @@ public class TxnHandler {
   static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName());
 
   static private DataSource connPool;
+  static private boolean doRetryOnConnPool = false;
   private final static Object lockLock = new Object(); // Random object to lock on for the lock
   // method
 
@@ -885,18 +886,19 @@ public class TxnHandler {
 
   }
 
-  /**
-   * Get a connection to the database
-   * @param isolationLevel desired isolation level.  If you are doing _any_ data modifications
-   *                       you should request serializable, else read committed should be fine.
-   * @return db connection
-   * @throws MetaException if the connection cannot be obtained
-   */
   protected Connection getDbConn(int isolationLevel) throws SQLException {
-    Connection dbConn = connPool.getConnection();
-    dbConn.setAutoCommit(false);
-    dbConn.setTransactionIsolation(isolationLevel);
-    return dbConn;
+    int rc = doRetryOnConnPool ? 10 : 1;
+    while (true) {
+      try {
+        Connection dbConn = connPool.getConnection();
+        dbConn.setAutoCommit(false);
+        dbConn.setTransactionIsolation(isolationLevel);
+        return dbConn;
+      } catch (SQLException e){
+        if ((--rc) <= 0) throw e;
+        LOG.error("There is a problem with a connection from the pool, retrying", e);
+      }
+    }
   }
 
   void rollbackDBConn(Connection dbConn) {
@@ -1964,6 +1966,7 @@ public class TxnHandler {
       config.setUser(user);
       config.setPassword(passwd);
       connPool = new BoneCPDataSource(config);
+      doRetryOnConnPool = true;  // Enable retries to work around BONECP bug.
     } else if ("dbcp".equals(connectionPooler)) {
       ObjectPool objectPool = new GenericObjectPool();
       ConnectionFactory connFactory = new DriverManagerConnectionFactory(driverUrl, user, passwd);

[09/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.

Posted by se...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
index 2dad1e7..f44c80e 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
@@ -268,7 +268,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -288,7 +288,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -603,7 +603,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -623,7 +623,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -670,7 +670,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -690,7 +690,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -947,7 +947,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -967,7 +967,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1219,7 +1219,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1239,7 +1239,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1552,7 +1552,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1572,7 +1572,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1895,7 +1895,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1915,7 +1915,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2193,7 +2193,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2213,7 +2213,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2260,7 +2260,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2280,7 +2280,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2517,7 +2517,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2537,7 +2537,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2581,7 +2581,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2601,7 +2601,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2863,7 +2863,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2883,7 +2883,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3185,7 +3185,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3205,7 +3205,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3509,7 +3509,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3529,7 +3529,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3843,7 +3843,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3863,7 +3863,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4257,7 +4257,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4277,7 +4277,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4680,7 +4680,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4700,7 +4700,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4747,7 +4747,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4767,7 +4767,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5065,7 +5065,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5085,7 +5085,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5357,7 +5357,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5377,7 +5377,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5836,7 +5836,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5856,7 +5856,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -6454,7 +6454,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -6474,7 +6474,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -6906,7 +6906,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -6926,7 +6926,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -7339,7 +7339,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -7359,7 +7359,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -7762,7 +7762,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -7782,7 +7782,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -8255,7 +8255,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -8275,7 +8275,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -8692,7 +8692,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -8712,7 +8712,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_fast_stats.q.out b/ql/src/test/results/clientpositive/union_fast_stats.q.out
index 71a0486..1affbe1 100644
--- a/ql/src/test/results/clientpositive/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/union_fast_stats.q.out
@@ -120,7 +120,7 @@ Table Parameters:
 	numFiles            	4                   
 	numRows             	15                  
 	rawDataSize         	3483                
-	totalSize           	3915                
+	totalSize           	4211                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -172,8 +172,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	4                   
 	numRows             	15                  
-	rawDataSize         	3483                
-	totalSize           	3915                
+	rawDataSize         	3651                
+	totalSize           	4211                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -237,8 +237,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	5                   
 	numRows             	20                  
-	rawDataSize         	4552                
-	totalSize           	5225                
+	rawDataSize         	4720                
+	totalSize           	5568                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -393,7 +393,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	15                  
 	rawDataSize         	3483                
-	totalSize           	3176                
+	totalSize           	3223                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -446,7 +446,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	15                  
 	rawDataSize         	3320                
-	totalSize           	3176                
+	totalSize           	3223                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -511,7 +511,7 @@ Table Parameters:
 	numFiles            	2                   
 	numRows             	20                  
 	rawDataSize         	4389                
-	totalSize           	4486                
+	totalSize           	4580                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
index e65a880..5e6a72e 100644
--- a/ql/src/test/results/clientpositive/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
@@ -263,7 +263,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -283,7 +283,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -654,7 +654,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -674,7 +674,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -1017,7 +1017,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1037,7 +1037,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -1283,7 +1283,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1303,7 +1303,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -1657,7 +1657,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1677,7 +1677,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -2041,7 +2041,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2061,7 +2061,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -2380,7 +2380,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2400,7 +2400,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -2520,7 +2520,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2540,7 +2540,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -2734,7 +2734,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2754,7 +2754,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -2874,7 +2874,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2894,7 +2894,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -3116,7 +3116,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -3136,7 +3136,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -3479,7 +3479,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -3499,7 +3499,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -3844,7 +3844,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -3864,7 +3864,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -4219,7 +4219,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -4239,7 +4239,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -4721,7 +4721,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -4741,7 +4741,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -5185,7 +5185,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -5205,7 +5205,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -5325,7 +5325,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -5345,7 +5345,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -5650,7 +5650,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -5670,7 +5670,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -5982,7 +5982,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -6002,7 +6002,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -6501,7 +6501,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -6521,7 +6521,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -7251,7 +7251,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -7271,7 +7271,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -7791,7 +7791,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -7811,7 +7811,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -8359,7 +8359,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -8379,7 +8379,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -8870,7 +8870,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -8890,7 +8890,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -9498,7 +9498,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -9518,7 +9518,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc
@@ -10023,7 +10023,7 @@ STAGE PLANS:
               serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 2639
+              totalSize 2689
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -10043,7 +10043,7 @@ STAGE PLANS:
                 serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                totalSize 2639
+                totalSize 2689
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
               name: default.part_orc

[07/22] hive git commit: HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.

HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c445cc8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c445cc8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c445cc8

Branch: refs/heads/llap
Commit: 2c445cc8dc0bedf2297725ab2404c9d866b5906e
Parents: a6ab68e
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:11:27 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:11:27 2015 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java   | 3 +++
 ql/src/test/queries/clientpositive/update_all_types.q            | 2 +-
 ql/src/test/results/clientpositive/tez/update_all_types.q.out    | 4 ++--
 ql/src/test/results/clientpositive/update_all_types.q.out        | 4 ++--
 4 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index dda28b0..dbc6d8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12183,6 +12183,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     if (!SessionState.get().getTxnMgr().supportsAcid()) return false;
     String tableIsTransactional =
         tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
+    if(tableIsTransactional == null) {
+      tableIsTransactional = tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase());
+    }
     return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/queries/clientpositive/update_all_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/update_all_types.q b/ql/src/test/queries/clientpositive/update_all_types.q
index 262a304..0229845 100644
--- a/ql/src/test/queries/clientpositive/update_all_types.q
+++ b/ql/src/test/queries/clientpositive/update_all_types.q
@@ -17,7 +17,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE');
 
 insert into table acid_uat
     select ctinyint,

http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/results/clientpositive/tez/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/update_all_types.q.out b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/tez/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@acid_uat

http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/results/clientpositive/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/update_all_types.q.out b/ql/src/test/results/clientpositive/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
                  s string,
                  vc varchar(128),
                  ch char(36),
-                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+                 b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@acid_uat

[17/22] hive git commit: HIVE-11898 : support default partition in metastoredirectsql (Sergey Shelukhin, reviewed by Sushanth Sowmyan)

Posted by se...@apache.org.

HIVE-11898 : support default partition in metastoredirectsql (Sergey Shelukhin, reviewed by Sushanth Sowmyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/116c3e3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/116c3e3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/116c3e3b

Branch: refs/heads/llap
Commit: 116c3e3b2d2b2b22fef9abed8f092b04155784cd
Parents: a953b43
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:11:58 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:11:58 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/MetaStoreDirectSql.java      | 34 +++++++++++++-------
 1 file changed, 22 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/116c3e3b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 1f89b7c..95b1ccc 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -109,6 +109,7 @@ class MetaStoreDirectSql {
   private final DB dbType;
   private final int batchSize;
   private final boolean convertMapNullsToEmptyStrings;
+  private final String defaultPartName;
 
   /**
    * Whether direct SQL can be used with the current datastore backing {@link #pm}.
@@ -116,6 +117,7 @@ class MetaStoreDirectSql {
   private final boolean isCompatibleDatastore;
   private final boolean isAggregateStatsCacheEnabled;
   private AggregateStatsCache aggrStatsCache;
+
   public MetaStoreDirectSql(PersistenceManager pm, Configuration conf) {
     this.pm = pm;
     this.dbType = determineDbType();
@@ -127,6 +129,7 @@ class MetaStoreDirectSql {
 
     convertMapNullsToEmptyStrings =
         HiveConf.getBoolVar(conf, ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS);
+    defaultPartName = HiveConf.getVar(conf, ConfVars.DEFAULTPARTITIONNAME);
 
     String jdoIdFactory = HiveConf.getVar(conf, ConfVars.METASTORE_IDENTIFIER_FACTORY);
     if (! ("datanucleus1".equalsIgnoreCase(jdoIdFactory))){
@@ -390,7 +393,7 @@ class MetaStoreDirectSql {
     // Derby and Oracle do not interpret filters ANSI-properly in some cases and need a workaround.
     boolean dbHasJoinCastBug = (dbType == DB.DERBY || dbType == DB.ORACLE);
     String sqlFilter = PartitionFilterGenerator.generateSqlFilter(
-        table, tree, params, joins, dbHasJoinCastBug);
+        table, tree, params, joins, dbHasJoinCastBug, defaultPartName);
     if (sqlFilter == null) {
       return null; // Cannot make SQL filter to push down.
     }
@@ -490,8 +493,8 @@ class MetaStoreDirectSql {
     }
     List<Object> sqlResult = executeWithArray(query, params, queryText);
     long queryTime = doTrace ? System.nanoTime() : 0;
+    timingTrace(doTrace, queryText, start, queryTime);
     if (sqlResult.isEmpty()) {
-      timingTrace(doTrace, queryText, start, queryTime);
       return new ArrayList<Partition>(); // no partitions, bail early.
     }
 
@@ -508,7 +511,6 @@ class MetaStoreDirectSql {
       result = getPartitionsFromPartitionIds(dbName, tblName, isView, sqlResult);
     }
 
-    timingTrace(doTrace, queryText, start, queryTime);
     query.closeAll();
     return result;
   }
@@ -921,14 +923,16 @@ class MetaStoreDirectSql {
     private final List<Object> params;
     private final List<String> joins;
     private final boolean dbHasJoinCastBug;
+    private final String defaultPartName;
 
-    private PartitionFilterGenerator(
-        Table table, List<Object> params, List<String> joins, boolean dbHasJoinCastBug) {
+    private PartitionFilterGenerator(Table table, List<Object> params, List<String> joins,
+        boolean dbHasJoinCastBug, String defaultPartName) {
       this.table = table;
       this.params = params;
       this.joins = joins;
       this.dbHasJoinCastBug = dbHasJoinCastBug;
       this.filterBuffer = new FilterBuilder(false);
+      this.defaultPartName = defaultPartName;
     }
 
     /**
@@ -939,13 +943,14 @@ class MetaStoreDirectSql {
      * @return the string representation of the expression tree
      */
     private static String generateSqlFilter(Table table, ExpressionTree tree,
-        List<Object> params, List<String> joins, boolean dbHasJoinCastBug) throws MetaException {
+        List<Object> params, List<String> joins, boolean dbHasJoinCastBug, String defaultPartName)
+            throws MetaException {
       assert table != null;
       if (tree.getRoot() == null) {
         return "";
       }
       PartitionFilterGenerator visitor = new PartitionFilterGenerator(
-          table, params, joins, dbHasJoinCastBug);
+          table, params, joins, dbHasJoinCastBug, defaultPartName);
       tree.accept(visitor);
       if (visitor.filterBuffer.hasError()) {
         LOG.info("Unable to push down SQL filter: " + visitor.filterBuffer.getErrorMessage());
@@ -1071,28 +1076,33 @@ class MetaStoreDirectSql {
 
       // Build the filter and add parameters linearly; we are traversing leaf nodes LTR.
       String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\"";
+
       if (node.isReverseOrder) {
         params.add(nodeValue);
       }
+      String tableColumn = tableValue;
       if (colType != FilterType.String) {
         // The underlying database field is varchar, we need to compare numbers.
-        // Note that this won't work with __HIVE_DEFAULT_PARTITION__. It will fail and fall
-        // back to JDO. That is by design; we could add an ugly workaround here but didn't.
         if (colType == FilterType.Integral) {
           tableValue = "cast(" + tableValue + " as decimal(21,0))";
         } else if (colType == FilterType.Date) {
           tableValue = "cast(" + tableValue + " as date)";
         }
 
+        // Workaround for HIVE_DEFAULT_PARTITION - ignore it like JDO does, for now.
+        String tableValue0 = tableValue;
+        tableValue = "(case when " + tableColumn + " <> ?";
+        params.add(defaultPartName);
+
         if (dbHasJoinCastBug) {
           // This is a workaround for DERBY-6358 and Oracle bug; it is pretty horrible.
-          tableValue = "(case when \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? and "
+          tableValue += (" and \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? and "
               + "\"FILTER" + partColIndex + "\".\"PART_ID\" = \"PARTITIONS\".\"PART_ID\" and "
-                + "\"FILTER" + partColIndex + "\".\"INTEGER_IDX\" = " + partColIndex + " then "
-              + tableValue + " else null end)";
+                + "\"FILTER" + partColIndex + "\".\"INTEGER_IDX\" = " + partColIndex);
           params.add(table.getTableName().toLowerCase());
           params.add(table.getDbName().toLowerCase());
         }
+        tableValue += " then " + tableValue0 + " else null end)";
       }
       if (!node.isReverseOrder) {
         params.add(nodeValue);

[11/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.

Posted by se...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index f451fce..69cb6ff 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1106,6 +1106,8 @@ public class TestInputOutputFormat {
   @SuppressWarnings("unchecked,deprecation")
   public void testInOutFormat() throws Exception {
     Properties properties = new Properties();
+    properties.setProperty("columns", "x,y");
+    properties.setProperty("columns.types", "int:int");
     StructObjectInspector inspector;
     synchronized (TestOrcFile.class) {
       inspector = (StructObjectInspector)
@@ -1122,8 +1124,6 @@ public class TestInputOutputFormat {
     writer.write(serde.serialize(new MyRow(3,2), inspector));
     writer.close(true);
     serde = new OrcSerde();
-    properties.setProperty("columns", "x,y");
-    properties.setProperty("columns.types", "int:int");
     SerDeUtils.initializeSerDe(serde, conf, properties, null);
     assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
     inspector = (StructObjectInspector) serde.getObjectInspector();
@@ -1295,13 +1295,13 @@ public class TestInputOutputFormat {
   @SuppressWarnings("deprecation")
   public void testEmptyFile() throws Exception {
     Properties properties = new Properties();
+    properties.setProperty("columns", "x,y");
+    properties.setProperty("columns.types", "int:int");
     HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
     org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
         outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
             properties, Reporter.NULL);
     writer.close(true);
-    properties.setProperty("columns", "x,y");
-    properties.setProperty("columns.types", "int:int");
     SerDe serde = new OrcSerde();
     SerDeUtils.initializeSerDe(serde, conf, properties, null);
     InputFormat<?,?> in = new OrcInputFormat();
@@ -1352,6 +1352,8 @@ public class TestInputOutputFormat {
   @SuppressWarnings("unchecked,deprecation")
   public void testDefaultTypes() throws Exception {
     Properties properties = new Properties();
+    properties.setProperty("columns", "str,str2");
+    properties.setProperty("columns.types", "string:string");
     StructObjectInspector inspector;
     synchronized (TestOrcFile.class) {
       inspector = (StructObjectInspector)
@@ -1371,7 +1373,6 @@ public class TestInputOutputFormat {
     writer.write(serde.serialize(new StringRow("miles"), inspector));
     writer.close(true);
     serde = new OrcSerde();
-    properties.setProperty("columns", "str,str2");
     SerDeUtils.initializeSerDe(serde, conf, properties, null);
     inspector = (StructObjectInspector) serde.getObjectInspector();
     assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
@@ -1892,6 +1893,8 @@ public class TestInputOutputFormat {
   @SuppressWarnings("unchecked,deprecation")
   public void testSplitElimination() throws Exception {
     Properties properties = new Properties();
+    properties.setProperty("columns", "z,r");
+    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
     StructObjectInspector inspector;
     synchronized (TestOrcFile.class) {
       inspector = (StructObjectInspector)
@@ -1920,8 +1923,6 @@ public class TestInputOutputFormat {
             .build();
     conf.set("sarg.pushdown", toKryo(sarg));
     conf.set("hive.io.file.readcolumn.names", "z,r");
-    properties.setProperty("columns", "z,r");
-    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
     SerDeUtils.initializeSerDe(serde, conf, properties, null);
     inspector = (StructObjectInspector) serde.getObjectInspector();
     InputFormat<?,?> in = new OrcInputFormat();

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 0bb8401..06e3362 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -519,9 +519,9 @@ public class TestOrcFile {
       Object row = rows.next(null);
       assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos());
     }
-    assertEquals(1, OrcUtils.getFlattenedColumnsCount(inspector));
+    assertEquals(0, writer.getSchema().getMaximumId());
     boolean[] expected = new boolean[] {false};
-    boolean[] included = OrcUtils.includeColumns("", "ts", inspector);
+    boolean[] included = OrcUtils.includeColumns("", writer.getSchema());
     assertEquals(true, Arrays.equals(expected, included));
   }
 
@@ -546,17 +546,18 @@ public class TestOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
 
-    assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+    TypeDescription schema = writer.getSchema();
+    assertEquals(2, schema.getMaximumId());
     boolean[] expected = new boolean[] {false, false, true};
-    boolean[] included = OrcUtils.includeColumns("string1", "bytes1,string1", inspector);
+    boolean[] included = OrcUtils.includeColumns("string1", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, false, false};
-    included = OrcUtils.includeColumns("", "bytes1,string1", inspector);
+    included = OrcUtils.includeColumns("", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, false, false};
-    included = OrcUtils.includeColumns(null, "bytes1,string1", inspector);
+    included = OrcUtils.includeColumns(null, schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     // check the stats
@@ -656,9 +657,10 @@ public class TestOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
 
-    assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+    TypeDescription schema = writer.getSchema();
+    assertEquals(2, schema.getMaximumId());
     boolean[] expected = new boolean[] {false, true, false};
-    boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+    boolean[] included = OrcUtils.includeColumns("int1", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     Metadata metadata = reader.getMetadata();
@@ -742,14 +744,14 @@ public class TestOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
 
-    assertEquals(24, OrcUtils.getFlattenedColumnsCount(inspector));
+    TypeDescription schema = writer.getSchema();
+    assertEquals(23, schema.getMaximumId());
     boolean[] expected = new boolean[] {false, false, false, false, false,
         false, false, false, false, false,
         false, false, false, false, false,
         false, false, false, false, false,
         false, false, false, false};
-    boolean[] included = OrcUtils.includeColumns("",
-        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+    boolean[] included = OrcUtils.includeColumns("", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, true, false, false, false,
@@ -757,8 +759,7 @@ public class TestOrcFile {
         true, true, true, true, true,
         false, false, false, false, true,
         true, true, true, true};
-    included = OrcUtils.includeColumns("boolean1,string1,middle,map",
-        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, true, false, false, false,
@@ -766,8 +767,7 @@ public class TestOrcFile {
         true, true, true, true, true,
         false, false, false, false, true,
         true, true, true, true};
-    included = OrcUtils.includeColumns("boolean1,string1,middle,map",
-        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+    included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, true, true, true, true,
@@ -777,7 +777,7 @@ public class TestOrcFile {
         true, true, true, true};
     included = OrcUtils.includeColumns(
         "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
-        "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+        schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     Metadata metadata = reader.getMetadata();
@@ -1312,17 +1312,18 @@ public class TestOrcFile {
     Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
 
-    assertEquals(6, OrcUtils.getFlattenedColumnsCount(inspector));
+    TypeDescription schema = writer.getSchema();
+    assertEquals(5, schema.getMaximumId());
     boolean[] expected = new boolean[] {false, false, false, false, false, false};
-    boolean[] included = OrcUtils.includeColumns("", "time,union,decimal", inspector);
+    boolean[] included = OrcUtils.includeColumns("", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, true, false, false, false, true};
-    included = OrcUtils.includeColumns("time,decimal", "time,union,decimal", inspector);
+    included = OrcUtils.includeColumns("time,decimal", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     expected = new boolean[] {false, false, true, true, true, false};
-    included = OrcUtils.includeColumns("union", "time,union,decimal", inspector);
+    included = OrcUtils.includeColumns("union", schema);
     assertEquals(true, Arrays.equals(expected, included));
 
     assertEquals(false, reader.getMetadataKeys().iterator().hasNext());

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
index 39f71f1..60af40a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
@@ -896,7 +896,7 @@ public class TestOrcRawRecordMerger {
    */
   @Test
   public void testRecordReaderNewBaseAndDelta() throws Exception {
-    final int BUCKET = 10;
+    final int BUCKET = 11;
     Configuration conf = new Configuration();
     OrcOutputFormat of = new OrcOutputFormat();
     FileSystem fs = FileSystem.getLocal(conf);

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
index a3d3ec5..f838cbc 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
@@ -38,241 +38,45 @@ import org.junit.rules.TestName;
 
 public class TestOrcWideTable {
 
-  private static final int MEMORY_FOR_ORC = 512 * 1024 * 1024;
-  Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
-      + File.separator + "tmp"));
-
-  Configuration conf;
-  FileSystem fs;
-  Path testFilePath;
-  float memoryPercent;
-
-  @Rule
-  public TestName testCaseName = new TestName();
-
-  @Before
-  public void openFileSystem() throws Exception {
-    conf = new Configuration();
-    fs = FileSystem.getLocal(conf);
-    testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
-    fs.delete(testFilePath, false);
-    // make sure constant memory is available for ORC always
-    memoryPercent = (float) MEMORY_FOR_ORC / (float) ManagementFactory.getMemoryMXBean().
-        getHeapMemoryUsage().getMax();
-    conf.setFloat(HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL.varname, memoryPercent);
-  }
-
   @Test
   public void testBufferSizeFor1Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 128 * 1024;
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
+    assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        false, 1, 128*1024));
   }
 
   @Test
   public void testBufferSizeFor1000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 128 * 1024;
-    String columns = getRandomColumnNames(1000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
+    assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        false, 1000, 128*1024));
   }
 
   @Test
   public void testBufferSizeFor2000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(32 * 1024, newBufferSize);
-    }
+    assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        true, 2000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor2000ColNoCompression() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(64 * 1024, newBufferSize);
-    }
+    assertEquals(64 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        false, 2000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor4000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(16 * 1024, newBufferSize);
-    }
+    assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        true, 4000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor4000ColNoCompression() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(32 * 1024, newBufferSize);
-    }
+    assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        false, 4000, 256*1024));
   }
 
   @Test
   public void testBufferSizeFor25000Col() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 256 * 1024;
-    String columns = getRandomColumnNames(25000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      // 4K is the minimum buffer size
-      assertEquals(4 * 1024, newBufferSize);
-    }
-  }
-
-  @Test
-  public void testBufferSizeManualOverride1() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 1024;
-    String columns = getRandomColumnNames(2000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
-  }
-
-  @Test
-  public void testBufferSizeManualOverride2() throws IOException {
-    ObjectInspector inspector;
-    synchronized (TestOrcFile.class) {
-      inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
-          ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    }
-    int bufferSize = 2 * 1024;
-    String columns = getRandomColumnNames(4000);
-    // just for testing. manually write the column names
-    conf.set(IOConstants.COLUMNS, columns);
-    Writer writer = OrcFile.createWriter(
-        testFilePath,
-        OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
-            .compress(CompressionKind.NONE).bufferSize(bufferSize));
-    final int newBufferSize;
-    if (writer instanceof WriterImpl) {
-      WriterImpl orcWriter = (WriterImpl) writer;
-      newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
-      assertEquals(bufferSize, newBufferSize);
-    }
-  }
-
-  private String getRandomColumnNames(int n) {
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < n - 1; i++) {
-      sb.append("col").append(i).append(",");
-    }
-    sb.append("col").append(n - 1);
-    return sb.toString();
+    assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+        false, 25000, 256*1024));
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
new file mode 100644
index 0000000..c3095f7
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class TestTypeDescription {
+
+  @Test
+  public void testJson() {
+    TypeDescription bin = TypeDescription.createBinary();
+    assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
+        bin.toJson());
+    assertEquals("binary", bin.toString());
+    TypeDescription struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal());
+    assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
+        struct.toString());
+    assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n"
+            + "  \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
+            + "  \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
+            + "  \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}",
+        struct.toJson());
+    struct = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    assertEquals("struct<f1:union<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
+        struct.toString());
+    assertEquals(
+        "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
+            "  \"f1\": {\"category\": \"union\", \"id\": 1, \"max\": 3, \"children\": [\n" +
+            "    {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
+            "    {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
+            "  \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +
+            "    \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
+            "    \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" +
+            "    \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" +
+            "  \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}",
+        struct.toJson());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out b/ql/src/test/resources/orc-file-dump-bloomfilter.out
index add163c..19a2f65 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 21000
 Compression: ZLIB
 Compression size: 10000

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter2.out b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
index 06b65ce..a37408c 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 21000
 Compression: ZLIB
 Compression size: 10000

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
index 13e316e..73f9f05 100644
--- a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 21000
 Compression: ZLIB
 Compression size: 10000

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json
index fe31d5e..14cf962 100644
--- a/ql/src/test/resources/orc-file-dump.json
+++ b/ql/src/test/resources/orc-file-dump.json
@@ -1,7 +1,7 @@
 {
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
-  "writerVersion": "HIVE_8732",
+  "writerVersion": "HIVE_4243",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 10000,

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.out b/ql/src/test/resources/orc-file-dump.out
index 2f5962b..28935ba 100644
--- a/ql/src/test/resources/orc-file-dump.out
+++ b/ql/src/test/resources/orc-file-dump.out
@@ -1,5 +1,5 @@
 Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 21000
 Compression: ZLIB
 Compression size: 10000

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
index fdc3862..9c4e83c 100644
--- a/ql/src/test/resources/orc-file-has-null.out
+++ b/ql/src/test/resources/orc-file-has-null.out
@@ -1,5 +1,5 @@
 Structure for TestOrcFile.testHasNull.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 20000
 Compression: ZLIB
 Compression size: 10000

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 2bec917..cf523cb 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -98,7 +98,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 6 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 780 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -133,11 +133,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 3 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 3 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index 87e2fa6..ebc6c5b 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -89,11 +89,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: emp_orc
-          Statistics: Num rows: 3 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE Column stats: NONE
           Select Operator
             expressions: lastname (type: string), deptid (type: int)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 3 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE Column stats: NONE
             ListSink
 
 PREHOOK: query: -- table level analyze statistics

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
index 6f79d69..2f12b8d 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
@@ -894,7 +894,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	1400                
+	totalSize           	1392                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -936,7 +936,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	1102                
+	totalSize           	1096                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -978,7 +978,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	14                  
 	rawDataSize         	280                 
-	totalSize           	1216                
+	totalSize           	1210                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1020,7 +1020,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	1102                
+	totalSize           	1096                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1061,7 +1061,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	4548                
+	totalSize           	4524                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1102,7 +1102,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	2212                
+	totalSize           	2400                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1143,7 +1143,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	4534                
+	totalSize           	4510                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1184,7 +1184,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	2212                
+	totalSize           	2400                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
index cb0eb58..24ac550 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
@@ -1157,7 +1157,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	11                  
 	rawDataSize         	88                  
-	totalSize           	433                 
+	totalSize           	454                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1217,7 +1217,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	13                  
 	rawDataSize         	104                 
-	totalSize           	456                 
+	totalSize           	477                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1374,7 +1374,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	11                  
 	rawDataSize         	88                  
-	totalSize           	433                 
+	totalSize           	454                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1434,7 +1434,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	13                  
 	rawDataSize         	104                 
-	totalSize           	456                 
+	totalSize           	477                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
index 0f6b15d..f87a539 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
@@ -136,7 +136,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -179,7 +179,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 382
+              totalSize 389
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -269,7 +269,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -312,7 +312,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 382
+              totalSize 389
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -469,7 +469,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 277
+              totalSize 281
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -513,7 +513,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 279
+              totalSize 283
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -557,7 +557,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -601,7 +601,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -688,7 +688,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 277
+              totalSize 281
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -732,7 +732,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 279
+              totalSize 283
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -776,7 +776,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -820,7 +820,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index 1fdeb90..5903cd1 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -196,7 +196,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 383
+              totalSize 390
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -239,7 +239,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 404
+              totalSize 410
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -282,7 +282,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 412
+              totalSize 419
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -372,7 +372,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -415,7 +415,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 383
+              totalSize 390
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -458,7 +458,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 404
+              totalSize 410
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -501,7 +501,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 412
+              totalSize 419
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -604,7 +604,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -647,7 +647,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 383
+              totalSize 390
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -690,7 +690,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 404
+              totalSize 410
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -733,7 +733,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 412
+              totalSize 419
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -819,7 +819,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 362
+              totalSize 369
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -862,7 +862,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 383
+              totalSize 390
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -905,7 +905,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 404
+              totalSize 410
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -948,7 +948,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 412
+              totalSize 419
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1106,7 +1106,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 281
+              totalSize 286
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1150,7 +1150,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 291
+              totalSize 295
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1194,7 +1194,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 298
+              totalSize 302
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1238,7 +1238,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 277
+              totalSize 281
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1282,7 +1282,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 272
+              totalSize 276
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1326,7 +1326,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 283
+              totalSize 288
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1370,7 +1370,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 292
+              totalSize 297
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1414,7 +1414,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1458,7 +1458,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1502,7 +1502,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 294
+              totalSize 298
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1546,7 +1546,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 291
+              totalSize 295
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1633,7 +1633,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 281
+              totalSize 286
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1677,7 +1677,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 291
+              totalSize 295
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1721,7 +1721,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 298
+              totalSize 302
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1765,7 +1765,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 277
+              totalSize 281
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1809,7 +1809,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 272
+              totalSize 276
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1853,7 +1853,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 283
+              totalSize 288
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1897,7 +1897,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 292
+              totalSize 297
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1941,7 +1941,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1985,7 +1985,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 261
+              totalSize 265
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2029,7 +2029,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 294
+              totalSize 298
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -2073,7 +2073,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 291
+              totalSize 295
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
index 81ac963..2ea1e6e 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
@@ -230,7 +230,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 521
+              totalSize 531
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -273,7 +273,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 553
+              totalSize 562
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -316,7 +316,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 571
+              totalSize 580
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -359,7 +359,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 593
+              totalSize 602
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -543,7 +543,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 521
+              totalSize 531
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -586,7 +586,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 553
+              totalSize 562
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -629,7 +629,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 571
+              totalSize 580
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -672,7 +672,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 593
+              totalSize 602
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -910,7 +910,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 379
+              totalSize 386
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -954,7 +954,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 402
+              totalSize 409
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -998,7 +998,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 417
+              totalSize 423
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1042,7 +1042,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 376
+              totalSize 383
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1086,7 +1086,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 386
+              totalSize 394
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1130,7 +1130,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 380
+              totalSize 387
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1174,7 +1174,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 403
+              totalSize 409
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1218,7 +1218,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 359
+              totalSize 366
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1262,7 +1262,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 354
+              totalSize 361
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1306,7 +1306,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 405
+              totalSize 412
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -1350,7 +1350,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 405
+              totalSize 412
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_analyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_analyze.q.out b/ql/src/test/results/clientpositive/orc_analyze.q.out
index 6eb9a93..bc46852 100644
--- a/ql/src/test/results/clientpositive/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/orc_analyze.q.out
@@ -106,7 +106,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -154,7 +154,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -202,7 +202,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -291,7 +291,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -405,7 +405,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -448,7 +448,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -503,7 +503,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -546,7 +546,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -601,7 +601,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -644,7 +644,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -744,7 +744,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -787,7 +787,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -907,7 +907,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -950,7 +950,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1005,7 +1005,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1048,7 +1048,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1103,7 +1103,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1146,7 +1146,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1252,7 +1252,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1295,7 +1295,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1460,7 +1460,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1560,7 +1560,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1660,7 +1660,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_file_dump.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
index 67aa189..c494d47 100644
--- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
+++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
@@ -93,11 +93,11 @@ PREHOOK: Input: default@orc_ppd
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
 
 Stripe Statistics:
   Stripe 1:
@@ -192,7 +192,7 @@ Stripes:
       Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
 
-File length: 33456 bytes
+File length: 33458 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 -- END ORC FILE DUMP --
@@ -211,11 +211,11 @@ PREHOOK: Input: default@orc_ppd
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
 
 Stripe Statistics:
   Stripe 1:
@@ -310,7 +310,7 @@ Stripes:
       Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8
 
-File length: 38610 bytes
+File length: 38613 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 -- END ORC FILE DUMP --
@@ -341,11 +341,11 @@ PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10
 #### A masked pattern was here ####
 -- BEGIN ORC FILE DUMP --
 #### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
 Rows: 1049
 Compression: ZLIB
 Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
 
 Stripe Statistics:
   Stripe 1:
@@ -440,7 +440,7 @@ Stripes:
       Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
 
-File length: 33456 bytes
+File length: 33458 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 -- END ORC FILE DUMP --

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
index d26dff2..03e2f7f 100644
--- a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
+++ b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
@@ -220,14 +220,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: alltypes_orc
-            Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

[10/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.

Posted by se...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 32514ca..100a3d9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -268,7 +268,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -288,7 +288,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -603,7 +603,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -623,7 +623,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -670,7 +670,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -690,7 +690,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -946,7 +946,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -966,7 +966,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1218,7 +1218,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1238,7 +1238,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1551,7 +1551,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1571,7 +1571,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -1894,7 +1894,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -1914,7 +1914,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2192,7 +2192,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2212,7 +2212,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2259,7 +2259,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2279,7 +2279,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2515,7 +2515,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2535,7 +2535,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2579,7 +2579,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2599,7 +2599,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -2860,7 +2860,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -2880,7 +2880,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3182,7 +3182,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3202,7 +3202,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3506,7 +3506,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3526,7 +3526,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -3840,7 +3840,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -3860,7 +3860,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4254,7 +4254,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4274,7 +4274,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4677,7 +4677,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4697,7 +4697,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -4744,7 +4744,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -4764,7 +4764,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5061,7 +5061,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5081,7 +5081,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5353,7 +5353,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5373,7 +5373,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -5832,7 +5832,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -5852,7 +5852,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -6464,7 +6464,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -6484,7 +6484,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -6916,7 +6916,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -6936,7 +6936,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -7349,7 +7349,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -7369,7 +7369,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -7772,7 +7772,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -7792,7 +7792,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -8265,7 +8265,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -8285,7 +8285,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc
@@ -8702,7 +8702,7 @@ STAGE PLANS:
                     serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                    totalSize 2639
+                    totalSize 2689
 #### A masked pattern was here ####
                   serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                 
@@ -8722,7 +8722,7 @@ STAGE PLANS:
                       serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                      totalSize 2639
+                      totalSize 2689
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                     name: default.part_orc

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
index 6e33a86..984ef69 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
@@ -946,7 +946,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	1400                
+	totalSize           	1392                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -988,7 +988,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	1102                
+	totalSize           	1096                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1030,7 +1030,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	14                  
 	rawDataSize         	280                 
-	totalSize           	1216                
+	totalSize           	1210                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1072,7 +1072,7 @@ Partition Parameters:
 	numFiles            	2                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	1102                
+	totalSize           	1096                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1113,7 +1113,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	4548                
+	totalSize           	4524                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1154,7 +1154,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	2212                
+	totalSize           	2400                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1195,7 +1195,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	32                  
 	rawDataSize         	640                 
-	totalSize           	4534                
+	totalSize           	4510                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1236,7 +1236,7 @@ Partition Parameters:
 	numFiles            	8                   
 	numRows             	6                   
 	rawDataSize         	120                 
-	totalSize           	2212                
+	totalSize           	2400                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
index 5bba0cb..ef09bea 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
@@ -1208,7 +1208,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	11                  
 	rawDataSize         	88                  
-	totalSize           	433                 
+	totalSize           	454                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1268,7 +1268,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	13                  
 	rawDataSize         	104                 
-	totalSize           	456                 
+	totalSize           	477                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1436,7 +1436,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	11                  
 	rawDataSize         	88                  
-	totalSize           	433                 
+	totalSize           	454                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1496,7 +1496,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	13                  
 	rawDataSize         	104                 
-	totalSize           	456                 
+	totalSize           	477                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
index 6eb9a93..bc46852 100644
--- a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
@@ -106,7 +106,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -154,7 +154,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -202,7 +202,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -291,7 +291,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	100                 
 	rawDataSize         	52600               
-	totalSize           	3174                
+	totalSize           	3202                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -405,7 +405,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -448,7 +448,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -503,7 +503,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -546,7 +546,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -601,7 +601,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -644,7 +644,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -744,7 +744,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -787,7 +787,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -907,7 +907,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -950,7 +950,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1005,7 +1005,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1048,7 +1048,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1103,7 +1103,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1146,7 +1146,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1252,7 +1252,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1295,7 +1295,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	22050               
-	totalSize           	2088                
+	totalSize           	2118                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1460,7 +1460,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1560,7 +1560,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -1660,7 +1660,7 @@ Partition Parameters:
 	numFiles            	1                   
 	numRows             	50                  
 	rawDataSize         	21950               
-	totalSize           	2073                
+	totalSize           	2102                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
index d0d82a0..9cb7a84 100644
--- a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
@@ -120,7 +120,7 @@ Table Parameters:
 	numFiles            	4                   
 	numRows             	0                   
 	rawDataSize         	0                   
-	totalSize           	3915                
+	totalSize           	4211                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -172,8 +172,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	4                   
 	numRows             	15                  
-	rawDataSize         	3483                
-	totalSize           	3915                
+	rawDataSize         	3651                
+	totalSize           	4211                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -237,8 +237,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	5                   
 	numRows             	20                  
-	rawDataSize         	4552                
-	totalSize           	5225                
+	rawDataSize         	4720                
+	totalSize           	5568                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -393,7 +393,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	5                   
 	rawDataSize         	1069                
-	totalSize           	3177                
+	totalSize           	3224                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -446,7 +446,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	15                  
 	rawDataSize         	3320                
-	totalSize           	3177                
+	totalSize           	3224                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -511,7 +511,7 @@ Table Parameters:
 	numFiles            	2                   
 	numRows             	20                  
 	rawDataSize         	4389                
-	totalSize           	4487                
+	totalSize           	4581                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
index 664f74a..92e7163 100644
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
@@ -183,11 +183,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -197,11 +197,11 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -211,16 +211,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col2 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col2 (type: int)
-                      Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: vectorized
 
@@ -294,11 +294,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -308,11 +308,11 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -322,16 +322,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: tinyint)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: tinyint)
-                      Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
 
   Stage: Stage-0
@@ -497,11 +497,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cint (type: int)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -511,7 +511,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       Map Join Operator
                         condition map:
@@ -522,7 +522,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 17 Data size: 4214 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 17 Data size: 4417 Basic stats: COMPLETE Column stats: NONE
                         HybridGraceHashJoin: true
                         Group By Operator
                           aggregations: count(), sum(_col0)
@@ -538,31 +538,31 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cint (type: int)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: tinyint)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: tinyint)
-                      Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
index 7568abf..aa201ad 100644
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
@@ -213,11 +213,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -227,11 +227,11 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -241,16 +241,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col2 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col2 (type: int)
-                      Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
             Execution mode: vectorized
 
@@ -359,11 +359,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -373,11 +373,11 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -387,16 +387,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: tinyint)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: tinyint)
-                      Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
 
   Stage: Stage-0
@@ -867,11 +867,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cint (type: int)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join0 to 1
@@ -881,7 +881,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
                       HybridGraceHashJoin: true
                       Map Join Operator
                         condition map:
@@ -891,7 +891,7 @@ STAGE PLANS:
                           1 _col0 (type: tinyint)
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 36 Data size: 8273 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 36 Data size: 8476 Basic stats: COMPLETE Column stats: NONE
                         HybridGraceHashJoin: true
                         Group By Operator
                           aggregations: count()
@@ -907,31 +907,31 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cint (type: int)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: tinyint)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: tinyint)
-                      Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:

[15/22] hive git commit: HIVE-11925 : Hive file format checking breaks load from named pipes (Sergey Shelukhin, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.

HIVE-11925 : Hive file format checking breaks load from named pipes (Sergey Shelukhin, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/82bc0e1c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/82bc0e1c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/82bc0e1c

Branch: refs/heads/llap
Commit: 82bc0e1c79ca656ec34a43efe4a8807f0f655e30
Parents: 24988f7
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 12:42:28 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 12:42:28 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/HiveFileFormatUtils.java  | 95 ++++++++++++++------
 .../hadoop/hive/ql/io/InputFormatChecker.java   |  5 +-
 .../hadoop/hive/ql/io/RCFileInputFormat.java    |  3 +-
 .../ql/io/SequenceFileInputFormatChecker.java   |  3 +-
 .../hive/ql/io/VectorizedRCFileInputFormat.java |  3 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  4 +-
 .../ql/io/orc/VectorizedOrcInputFormat.java     |  2 +-
 .../hive/ql/exec/TestFileSinkOperator.java      |  2 +-
 .../hive/ql/txn/compactor/CompactorTest.java    |  2 +-
 9 files changed, 80 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
index 50ba740..06d3df7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
@@ -19,8 +19,13 @@
 package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
+import java.nio.file.FileSystemNotFoundException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -28,10 +33,13 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
@@ -69,6 +77,7 @@ import org.apache.hive.common.util.ReflectionUtil;
  *
  */
 public final class HiveFileFormatUtils {
+  private static final Log LOG = LogFactory.getLog(HiveFileFormatUtils.class);
 
   static {
     outputFormatSubstituteMap =
@@ -177,44 +186,51 @@ public final class HiveFileFormatUtils {
    */
   @SuppressWarnings("unchecked")
   public static boolean checkInputFormat(FileSystem fs, HiveConf conf,
-      Class<? extends InputFormat> inputFormatCls, ArrayList<FileStatus> files)
+      Class<? extends InputFormat> inputFormatCls, List<FileStatus> files)
       throws HiveException {
-    if (files.size() > 0) {
-      Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls);
-      if (checkerCls == null
-          && inputFormatCls.isAssignableFrom(TextInputFormat.class)) {
-        // we get a text input format here, we can not determine a file is text
-        // according to its content, so we can do is to test if other file
-        // format can accept it. If one other file format can accept this file,
-        // we treat this file as text file, although it maybe not.
-        return checkTextInputFormat(fs, conf, files);
-      }
+    if (files.isEmpty()) return false;
+    Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls);
+    if (checkerCls == null
+        && inputFormatCls.isAssignableFrom(TextInputFormat.class)) {
+      // we get a text input format here, we can not determine a file is text
+      // according to its content, so we can do is to test if other file
+      // format can accept it. If one other file format can accept this file,
+      // we treat this file as text file, although it maybe not.
+      return checkTextInputFormat(fs, conf, files);
+    }
 
-      if (checkerCls != null) {
-        InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache
-            .get(checkerCls);
-        try {
-          if (checkerInstance == null) {
-            checkerInstance = checkerCls.newInstance();
-            inputFormatCheckerInstanceCache.put(checkerCls, checkerInstance);
-          }
-          return checkerInstance.validateInput(fs, conf, files);
-        } catch (Exception e) {
-          throw new HiveException(e);
+    if (checkerCls != null) {
+      InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache.get(checkerCls);
+      try {
+        if (checkerInstance == null) {
+          checkerInstance = checkerCls.newInstance();
+          inputFormatCheckerInstanceCache.put(checkerCls, checkerInstance);
         }
+        return checkerInstance.validateInput(fs, conf, files);
+      } catch (Exception e) {
+        throw new HiveException(e);
       }
-      return true;
     }
-    return false;
+    return true;
   }
 
   @SuppressWarnings("unchecked")
   private static boolean checkTextInputFormat(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files) throws HiveException {
-    Set<Class<? extends InputFormat>> inputFormatter = inputFormatCheckerMap
-        .keySet();
+      List<FileStatus> files) throws HiveException {
+    List<FileStatus> files2 = new LinkedList<>(files);
+    Iterator<FileStatus> iter = files2.iterator();
+    while (iter.hasNext()) {
+      FileStatus file = iter.next();
+      if (file == null) continue;
+      if (isPipe(fs, file)) {
+        LOG.info("Skipping format check for " + file.getPath() + " as it is a pipe");
+        iter.remove();
+      }
+    }
+    if (files2.isEmpty()) return true;
+    Set<Class<? extends InputFormat>> inputFormatter = inputFormatCheckerMap.keySet();
     for (Class<? extends InputFormat> reg : inputFormatter) {
-      boolean result = checkInputFormat(fs, conf, reg, files);
+      boolean result = checkInputFormat(fs, conf, reg, files2);
       if (result) {
         return false;
       }
@@ -222,6 +238,29 @@ public final class HiveFileFormatUtils {
     return true;
   }
 
+  // See include/uapi/linux/stat.h
+  private static final int S_IFIFO = 0010000;
+  private static boolean isPipe(FileSystem fs, FileStatus file) {
+    if (fs instanceof DistributedFileSystem) {
+      return false; // Shortcut for HDFS.
+    }
+    int mode = 0;
+    Object pathToLog = file.getPath();
+    try {
+      java.nio.file.Path realPath = Paths.get(file.getPath().toUri());
+      pathToLog = realPath;
+      mode = (Integer)Files.getAttribute(realPath, "unix:mode");
+    } catch (FileSystemNotFoundException t) {
+      return false; // Probably not a local filesystem; no need to check.
+    } catch (UnsupportedOperationException | IOException
+        | SecurityException | IllegalArgumentException t) {
+      LOG.info("Failed to check mode for " + pathToLog + ": "
+        + t.getMessage() + " (" + t.getClass() + ")");
+      return false;
+    }
+    return (mode & S_IFIFO) != 0;
+  }
+
   public static RecordWriter getHiveRecordWriter(JobConf jc,
       TableDesc tableInfo, Class<? extends Writable> outputClass,
       FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException {

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
index 3945411..129b834 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
@@ -19,7 +19,7 @@
 package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
-import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -34,7 +34,6 @@ public interface InputFormatChecker {
    * This method is used to validate the input files.
    * 
    */
-  boolean validateInput(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files) throws IOException;
+  boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws IOException;
 
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
index 88198ed..6004db8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -59,7 +60,7 @@ public class RCFileInputFormat<K extends LongWritable, V extends BytesRefArrayWr
 
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files) throws IOException {
+      List<FileStatus> files) throws IOException {
     if (files.size() <= 0) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
index e2666d7..6cb46c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -35,7 +36,7 @@ public class SequenceFileInputFormatChecker implements InputFormatChecker {
 
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files) throws IOException {
+      List<FileStatus> files) throws IOException {
     if (files.size() <= 0) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
index faad5f2..e9e1d5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -56,7 +57,7 @@ public class VectorizedRCFileInputFormat extends FileInputFormat<NullWritable, V
 
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files) throws IOException {
+      List<FileStatus> files) throws IOException {
     if (files.size() <= 0) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index c45b6e6..57bde3e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -106,7 +106,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
  * that added this event. Insert and update events include the entire row, while
  * delete events have null for row.
  */
-public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
+public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
   InputFormatChecker, VectorizedInputFormatInterface,
     AcidInputFormat<NullWritable, OrcStruct>, CombineHiveInputFormat.AvoidSplitCombination {
 
@@ -395,7 +395,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
   }
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
-                               ArrayList<FileStatus> files
+                               List<FileStatus> files
                               ) throws IOException {
 
     if (Utilities.isVectorMode(conf)) {

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 3992d8c..bf09001 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -158,7 +158,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
 
   @Override
   public boolean validateInput(FileSystem fs, HiveConf conf,
-      ArrayList<FileStatus> files
+      List<FileStatus> files
       ) throws IOException {
     if (files.size() <= 0) {
       return false;

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index 9e89376..4594836 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -704,7 +704,7 @@ public class TestFileSinkOperator {
     }
 
     @Override
-    public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws
+    public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws
         IOException {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 21adc9d..5a8c932 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -361,7 +361,7 @@ public abstract class CompactorTest {
     }
 
     @Override
-    public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws
+    public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws
         IOException {
       return false;
     }

[06/22] hive git commit: HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)

Posted by se...@apache.org.

HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6ab68e8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6ab68e8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6ab68e8

Branch: refs/heads/llap
Commit: a6ab68e8ff889933a6d7d164d8c91ed8d3fa8609
Parents: 0d43e87
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:07:20 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:07:20 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/metastore/txn/TxnHandler.java      | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a6ab68e8/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index cc7e2c6..6218a03 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -47,8 +47,9 @@ import java.util.concurrent.TimeUnit;
  * A handler to answer transaction related calls that come into the metastore
  * server.
  *
- * Note on log messages:  Please include txnid:X and lockid info
- * {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
+ * Note on log messages:  Please include txnid:X and lockid info using
+ * {@link org.apache.hadoop.hive.common.JavaUtils#txnIdToString(long)}
+ * and {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
  * The txnid:X and lockid:Y matches how Thrift object toString() methods are generated,
  * so keeping the format consistent makes grep'ing the logs much easier.
  */
@@ -166,7 +167,8 @@ public class TxnHandler {
         }
 
         List<TxnInfo> txnInfo = new ArrayList<TxnInfo>();
-        s = "select txn_id, txn_state, txn_user, txn_host from TXNS";
+        //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+        s = "select txn_id, txn_state, txn_user, txn_host from TXNS where txn_id <= " + hwm;
         LOG.debug("Going to execute query<" + s + ">");
         rs = stmt.executeQuery(s);
         while (rs.next()) {
@@ -230,7 +232,8 @@ public class TxnHandler {
         }
 
         Set<Long> openList = new HashSet<Long>();
-        s = "select txn_id from TXNS";
+        //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+        s = "select txn_id from TXNS where txn_id <= " + hwm;
         LOG.debug("Going to execute query<" + s + ">");
         rs = stmt.executeQuery(s);
         while (rs.next()) {
@@ -1459,7 +1462,7 @@ public class TxnHandler {
     LockResponse response = new LockResponse();
     response.setLockid(extLockId);
 
-    LOG.debug("checkLock(): Setting savepoint. extLockId=" + extLockId);
+    LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
     Savepoint save = dbConn.setSavepoint();
     StringBuilder query = new StringBuilder("select hl_lock_ext_id, " +
       "hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " +
@@ -1685,7 +1688,7 @@ public class TxnHandler {
     if (rc < 1) {
       LOG.debug("Going to rollback");
       dbConn.rollback();
-      throw new NoSuchLockException("No such lock: (" + extLockId + "," +
+      throw new NoSuchLockException("No such lock: (" + JavaUtils.lockIdToString(extLockId) + "," +
         + intLockId + ")");
     }
     // We update the database, but we don't commit because there may be other
@@ -1710,7 +1713,7 @@ public class TxnHandler {
       if (rc < 1) {
         LOG.debug("Going to rollback");
         dbConn.rollback();
-        throw new NoSuchLockException("No such lock: " + extLockId);
+        throw new NoSuchLockException("No such lock: " + JavaUtils.lockIdToString(extLockId));
       }
       LOG.debug("Going to commit");
       dbConn.commit();

[08/22] hive git commit: HIVE-11445 : CBO: Calcite Operator To Hive Operator (Calcite Return Path) : groupby distinct does not work (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)

Posted by se...@apache.org.

HIVE-11445 : CBO: Calcite Operator To Hive Operator (Calcite Return Path) : groupby distinct does not work (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99fa337b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99fa337b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99fa337b

Branch: refs/heads/llap
Commit: 99fa337b0b146be984fc49d52ecb1a3494164082
Parents: 2c445cc
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Sep 30 09:32:27 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Oct 1 10:10:05 2015 +0100

----------------------------------------------------------------------
 .../calcite/translator/HiveGBOpConvUtil.java    |  43 ++--
 .../cbo_rp_gby2_map_multi_distinct.q            |  38 +++
 .../cbo_rp_gby2_map_multi_distinct.q.out        | 236 +++++++++++++++++++
 3 files changed, 299 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index a0e2e67..a129cf3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -40,15 +40,14 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -70,12 +69,17 @@ import com.google.common.collect.ImmutableList;
  * external names if possible.<br>
  * 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified
  * differently for different GB/RS in pipeline. Remove the different treatments.
- * 3. VirtualColMap needs to be maintained
+ * 4. VirtualColMap needs to be maintained
  *
  */
 public class HiveGBOpConvUtil {
+
   private static enum HIVEGBPHYSICALMODE {
-    MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
+    MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB,
+    MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB,
+    MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT,
+    MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT,
+    NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
   };
 
   private static class UDAFAttrs {
@@ -94,8 +98,8 @@ public class HiveGBOpConvUtil {
     private final List<ExprNodeDesc>  gbKeys               = new ArrayList<ExprNodeDesc>();
 
     private final List<Integer>       grpSets              = new ArrayList<Integer>();
-    private boolean             grpSetRqrAdditionalMRJob;
-    private boolean             grpIdFunctionNeeded;
+    private boolean                   grpSetRqrAdditionalMRJob;
+    private boolean                   grpIdFunctionNeeded;
 
     private final List<String>        distExprNames        = new ArrayList<String>();
     private final List<TypeInfo>      distExprTypes        = new ArrayList<TypeInfo>();
@@ -105,12 +109,12 @@ public class HiveGBOpConvUtil {
     private final List<ExprNodeDesc>  deDupedNonDistIrefs  = new ArrayList<ExprNodeDesc>();
 
     private final List<UDAFAttrs>     udafAttrs            = new ArrayList<UDAFAttrs>();
-    private boolean             containsDistinctAggr = false;
+    private boolean                   containsDistinctAggr = false;
 
-    float                       groupByMemoryUsage;
-    float                       memoryThreshold;
+    float                             groupByMemoryUsage;
+    float                             memoryThreshold;
 
-    private HIVEGBPHYSICALMODE  gbPhysicalPipelineMode;
+    private HIVEGBPHYSICALMODE        gbPhysicalPipelineMode;
   };
 
   private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) {
@@ -203,11 +207,14 @@ public class HiveGBOpConvUtil {
       for (int i = 0; i < argLst.size(); i++) {
         if (!distinctRefs.contains(argLst.get(i))) {
           distinctRefs.add(argLst.get(i));
-          distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
           distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv);
-          gbInfo.distExprNodes.add(distinctExpr);
-          gbInfo.distExprNames.add(argNames.get(i));
-          gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+          // Only distinct nodes that are NOT part of the key should be added to distExprNodes
+          if (ExprNodeDescUtils.indexOf(distinctExpr, gbInfo.gbKeys) < 0) {
+            distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
+            gbInfo.distExprNodes.add(distinctExpr);
+            gbInfo.distExprNames.add(argNames.get(i));
+            gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+          }
         }
       }
     }
@@ -254,10 +261,10 @@ public class HiveGBOpConvUtil {
       }
 
       // special handling for count, similar to PlanModifierForASTConv::replaceEmptyGroupAggr()
-        udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
-            new ArrayList<ExprNodeDesc>(udafAttrs.udafParams), new ASTNode(),
-            udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
-            "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
+      udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
+          new ArrayList<ExprNodeDesc>(udafAttrs.udafParams), new ASTNode(),
+          udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
+          "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
       gbInfo.udafAttrs.add(udafAttrs);
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
new file mode 100644
index 0000000..28f1f81
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
@@ -0,0 +1,38 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+set mapred.reduce.tasks=31;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1;
+
+-- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1;

http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
new file mode 100644
index 0000000..8592d6c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
@@ -0,0 +1,236 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+              outputColumnNames: $f0, $f1, $f2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2)
+                keys: $f0 (type: string), $f1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: double), _col5 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0	0	3
+1	71	116414.0	10044	115
+2	69	225571.0	15780	111
+3	62	332004.0	20119	99
+4	74	452763.0	30965	124
+5	6	5397.0	278	10
+6	5	6398.0	331	6
+7	6	7735.0	447	10
+8	8	8762.0	595	10
+9	7	91047.0	577	12
+PREHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+              outputColumnNames: $f0, $f1, $f2
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2)
+                keys: $f0 (type: string), $f1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col3 (type: double), _col5 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0	1	00.0	0	3
+1	1	116414.0	10044	115
+2	1	225571.0	15780	111
+3	1	332004.0	20119	99
+4	1	452763.0	30965	124
+5	1	5397.0	278	10
+6	1	6398.0	331	6
+7	1	7735.0	447	10
+8	1	8762.0	595	10
+9	1	91047.0	577	12

[21/22] hive git commit: HIVE-12013 : LLAP: disable most llap tests before merge (Sergey Shelukhin)

Posted by se...@apache.org.

HIVE-12013 : LLAP: disable most llap tests before merge (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a1bc2ef4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a1bc2ef4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a1bc2ef4

Branch: refs/heads/llap
Commit: a1bc2ef4cda1fb9a42a58b9433bf60737519d32c
Parents: f272ccb
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 19:29:32 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 19:29:32 2015 -0700

----------------------------------------------------------------------
 itests/qtest/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a1bc2ef4/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 8c41b5a..bb5b1a1 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -538,7 +538,7 @@
                               templatePath="${basedir}/${hive.path.to.root}/ql/src/test/templates/" template="TestCliDriver.vm"
                               queryDirectory="${basedir}/${hive.path.to.root}/ql/src/test/queries/clientpositive/"
                               queryFile="${qfile}"
-                              includeQueryFile="${minitez.query.files},${minitez.query.files.shared}"
+                              includeQueryFile="${minitez.query.files}"
                               queryFileRegex="${qfile_regex}"
                               clusterMode="llap"
                               runDisabled="${run_disabled}"

[12/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.

Posted by se...@apache.org.

HIVE-4243. Fix column names in ORC metadata.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b1ed3d3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b1ed3d3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b1ed3d3

Branch: refs/heads/llap
Commit: 7b1ed3d3037860e2b7fc24b760a993f5e928b816
Parents: 99fa337
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Sep 4 16:11:13 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Oct 1 13:07:03 2015 +0200

----------------------------------------------------------------------
 .../hive/ql/io/orc/ColumnStatisticsImpl.java    |  55 +-
 .../apache/hadoop/hive/ql/io/orc/OrcFile.java   |  33 +-
 .../hadoop/hive/ql/io/orc/OrcOutputFormat.java  | 145 ++++-
 .../apache/hadoop/hive/ql/io/orc/OrcUtils.java  | 177 +-----
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |   2 +-
 .../hadoop/hive/ql/io/orc/TypeDescription.java  | 466 ++++++++++++++++
 .../apache/hadoop/hive/ql/io/orc/Writer.java    |   9 +
 .../hadoop/hive/ql/io/orc/WriterImpl.java       | 550 +++++++++----------
 .../hadoop/hive/ql/io/orc/orc_proto.proto       |   1 +
 .../hive/ql/io/orc/TestColumnStatistics.java    |  43 +-
 .../hive/ql/io/orc/TestInputOutputFormat.java   |  15 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |  41 +-
 .../hive/ql/io/orc/TestOrcRawRecordMerger.java  |   2 +-
 .../hadoop/hive/ql/io/orc/TestOrcWideTable.java | 224 +-------
 .../hive/ql/io/orc/TestTypeDescription.java     |  67 +++
 .../resources/orc-file-dump-bloomfilter.out     |   2 +-
 .../resources/orc-file-dump-bloomfilter2.out    |   2 +-
 .../orc-file-dump-dictionary-threshold.out      |   2 +-
 ql/src/test/resources/orc-file-dump.json        |   2 +-
 ql/src/test/resources/orc-file-dump.out         |   2 +-
 ql/src/test/resources/orc-file-has-null.out     |   2 +-
 .../clientpositive/annotate_stats_part.q.out    |   6 +-
 .../clientpositive/annotate_stats_table.q.out   |   4 +-
 .../dynpart_sort_opt_vectorization.q.out        |  16 +-
 .../dynpart_sort_optimization2.q.out            |   8 +-
 .../extrapolate_part_stats_full.q.out           |  24 +-
 .../extrapolate_part_stats_partial.q.out        |  76 +--
 .../extrapolate_part_stats_partial_ndv.q.out    |  38 +-
 .../results/clientpositive/orc_analyze.q.out    |  46 +-
 .../results/clientpositive/orc_file_dump.q.out  |  18 +-
 .../clientpositive/orc_int_type_promotion.q.out |   6 +-
 .../clientpositive/spark/vectorized_ptf.q.out   | 108 ++--
 .../tez/dynpart_sort_opt_vectorization.q.out    |  16 +-
 .../tez/dynpart_sort_optimization2.q.out        |   8 +-
 .../clientpositive/tez/orc_analyze.q.out        |  46 +-
 .../clientpositive/tez/union_fast_stats.q.out   |  16 +-
 .../clientpositive/tez/vector_outer_join1.q.out |  48 +-
 .../clientpositive/tez/vector_outer_join4.q.out |  48 +-
 .../clientpositive/tez/vectorized_ptf.q.out     | 108 ++--
 .../clientpositive/union_fast_stats.q.out       |  16 +-
 .../results/clientpositive/vectorized_ptf.q.out | 104 ++--
 41 files changed, 1468 insertions(+), 1134 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index 15a3e2c..f39d3e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -22,8 +22,6 @@ import java.sql.Timestamp;
 
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
@@ -964,35 +962,30 @@ class ColumnStatisticsImpl implements ColumnStatistics {
     return builder;
   }
 
-  static ColumnStatisticsImpl create(ObjectInspector inspector) {
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
-          case BOOLEAN:
-            return new BooleanStatisticsImpl();
-          case BYTE:
-          case SHORT:
-          case INT:
-          case LONG:
-            return new IntegerStatisticsImpl();
-          case FLOAT:
-          case DOUBLE:
-            return new DoubleStatisticsImpl();
-          case STRING:
-          case CHAR:
-          case VARCHAR:
-            return new StringStatisticsImpl();
-          case DECIMAL:
-            return new DecimalStatisticsImpl();
-          case DATE:
-            return new DateStatisticsImpl();
-          case TIMESTAMP:
-            return new TimestampStatisticsImpl();
-          case BINARY:
-            return new BinaryStatisticsImpl();
-          default:
-            return new ColumnStatisticsImpl();
-        }
+  static ColumnStatisticsImpl create(TypeDescription schema) {
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanStatisticsImpl();
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerStatisticsImpl();
+      case FLOAT:
+      case DOUBLE:
+        return new DoubleStatisticsImpl();
+      case STRING:
+      case CHAR:
+      case VARCHAR:
+        return new StringStatisticsImpl();
+      case DECIMAL:
+        return new DecimalStatisticsImpl();
+      case DATE:
+        return new DateStatisticsImpl();
+      case TIMESTAMP:
+        return new TimestampStatisticsImpl();
+      case BINARY:
+        return new BinaryStatisticsImpl();
       default:
         return new ColumnStatisticsImpl();
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index a60ebb4..23dec4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
 /**
  * Contains factory methods to read or write ORC files.
@@ -102,7 +103,9 @@ public final class OrcFile {
    */
   public enum WriterVersion {
     ORIGINAL(0),
-      HIVE_8732(1); // corrupted stripe/file maximum column statistics
+      HIVE_8732(1), // corrupted stripe/file maximum column statistics
+      HIVE_4243(2), // use real column names from Hive tables
+      FUTURE(Integer.MAX_VALUE); // a version from a future writer
 
     private final int id;
 
@@ -205,7 +208,9 @@ public final class OrcFile {
   public static class WriterOptions {
     private final Configuration configuration;
     private FileSystem fileSystemValue = null;
-    private ObjectInspector inspectorValue = null;
+    private boolean explicitSchema = false;
+    private TypeDescription schema = null;
+    private ObjectInspector inspector = null;
     private long stripeSizeValue;
     private long blockSizeValue;
     private int rowIndexStrideValue;
@@ -355,11 +360,26 @@ public final class OrcFile {
     }
 
     /**
-     * A required option that sets the object inspector for the rows. Used
-     * to determine the schema for the file.
+     * A required option that sets the object inspector for the rows. If
+     * setSchema is not called, it also defines the schema.
      */
     public WriterOptions inspector(ObjectInspector value) {
-      inspectorValue = value;
+      this.inspector = value;
+      if (!explicitSchema) {
+        schema = OrcOutputFormat.convertTypeInfo(
+            TypeInfoUtils.getTypeInfoFromObjectInspector(value));
+      }
+      return this;
+    }
+
+    /**
+     * Set the schema for the file. This is a required parameter.
+     * @param schema the schema for the file.
+     * @return this
+     */
+    public WriterOptions setSchema(TypeDescription schema) {
+      this.explicitSchema = true;
+      this.schema = schema;
       return this;
     }
 
@@ -426,7 +446,8 @@ public final class OrcFile {
     FileSystem fs = opts.fileSystemValue == null ?
       path.getFileSystem(opts.configuration) : opts.fileSystemValue;
 
-    return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue,
+    return new WriterImpl(fs, path, opts.configuration, opts.inspector,
+                          opts.schema,
                           opts.stripeSizeValue, opts.compressValue,
                           opts.bufferSizeValue, opts.rowIndexStrideValue,
                           opts.memoryManagerValue, opts.blockPaddingValue,

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index ea4ebb4..ad24c58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -20,12 +20,17 @@ package org.apache.hadoop.hive.ql.io.orc;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Properties;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.RecordUpdater;
 import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
@@ -36,6 +41,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
@@ -52,6 +66,90 @@ import org.apache.hadoop.util.Progressable;
 public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
                         implements AcidOutputFormat<NullWritable, OrcSerdeRow> {
 
+  private static final Log LOG = LogFactory.getLog(OrcOutputFormat.class);
+
+  static TypeDescription convertTypeInfo(TypeInfo info) {
+    switch (info.getCategory()) {
+      case PRIMITIVE: {
+        PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info;
+        switch (pinfo.getPrimitiveCategory()) {
+          case BOOLEAN:
+            return TypeDescription.createBoolean();
+          case BYTE:
+            return TypeDescription.createByte();
+          case SHORT:
+            return TypeDescription.createShort();
+          case INT:
+            return TypeDescription.createInt();
+          case LONG:
+            return TypeDescription.createLong();
+          case FLOAT:
+            return TypeDescription.createFloat();
+          case DOUBLE:
+            return TypeDescription.createDouble();
+          case STRING:
+            return TypeDescription.createString();
+          case DATE:
+            return TypeDescription.createDate();
+          case TIMESTAMP:
+            return TypeDescription.createTimestamp();
+          case BINARY:
+            return TypeDescription.createBinary();
+          case DECIMAL: {
+            DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
+            return TypeDescription.createDecimal()
+                .withScale(dinfo.getScale())
+                .withPrecision(dinfo.getPrecision());
+          }
+          case VARCHAR: {
+            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
+            return TypeDescription.createVarchar()
+                .withMaxLength(cinfo.getLength());
+          }
+          case CHAR: {
+            BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
+            return TypeDescription.createChar()
+                .withMaxLength(cinfo.getLength());
+          }
+          default:
+            throw new IllegalArgumentException("ORC doesn't handle primitive" +
+                " category " + pinfo.getPrimitiveCategory());
+        }
+      }
+      case LIST: {
+        ListTypeInfo linfo = (ListTypeInfo) info;
+        return TypeDescription.createList
+            (convertTypeInfo(linfo.getListElementTypeInfo()));
+      }
+      case MAP: {
+        MapTypeInfo minfo = (MapTypeInfo) info;
+        return TypeDescription.createMap
+            (convertTypeInfo(minfo.getMapKeyTypeInfo()),
+                convertTypeInfo(minfo.getMapValueTypeInfo()));
+      }
+      case UNION: {
+        UnionTypeInfo minfo = (UnionTypeInfo) info;
+        TypeDescription result = TypeDescription.createUnion();
+        for (TypeInfo child: minfo.getAllUnionObjectTypeInfos()) {
+          result.addUnionChild(convertTypeInfo(child));
+        }
+        return result;
+      }
+      case STRUCT: {
+        StructTypeInfo sinfo = (StructTypeInfo) info;
+        TypeDescription result = TypeDescription.createStruct();
+        for(String fieldName: sinfo.getAllStructFieldNames()) {
+          result.addField(fieldName,
+              convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName)));
+        }
+        return result;
+      }
+      default:
+        throw new IllegalArgumentException("ORC doesn't handle " +
+            info.getCategory());
+    }
+  }
+
   private static class OrcRecordWriter
       implements RecordWriter<NullWritable, OrcSerdeRow>,
                  StatsProvidingRecordWriter {
@@ -115,7 +213,44 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
   }
 
   private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
-    return OrcFile.writerOptions(props, conf);
+    OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
+    if (props != null) {
+      final String columnNameProperty =
+          props.getProperty(IOConstants.COLUMNS);
+      final String columnTypeProperty =
+          props.getProperty(IOConstants.COLUMNS_TYPES);
+      if (columnNameProperty != null &&
+          !columnNameProperty.isEmpty() &&
+          columnTypeProperty != null &&
+          !columnTypeProperty.isEmpty()) {
+        List<String> columnNames;
+        List<TypeInfo> columnTypes;
+
+        if (columnNameProperty.length() == 0) {
+          columnNames = new ArrayList<String>();
+        } else {
+          columnNames = Arrays.asList(columnNameProperty.split(","));
+        }
+
+        if (columnTypeProperty.length() == 0) {
+          columnTypes = new ArrayList<TypeInfo>();
+        } else {
+          columnTypes =
+              TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+        }
+
+        TypeDescription schema = TypeDescription.createStruct();
+        for (int i = 0; i < columnNames.size(); ++i) {
+          schema.addField(columnNames.get(i),
+              convertTypeInfo(columnTypes.get(i)));
+        }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("ORC schema = " + schema);
+        }
+        result.setSchema(schema);
+      }
+    }
+    return result;
   }
 
   @Override
@@ -123,7 +258,7 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
   getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
                   Progressable reporter) throws IOException {
     return new
-        OrcRecordWriter(new Path(name), getOptions(conf,null));
+        OrcRecordWriter(new Path(name), getOptions(conf, null));
   }
 
 
@@ -135,7 +270,7 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
                          boolean isCompressed,
                          Properties tableProperties,
                          Progressable reporter) throws IOException {
-    return new OrcRecordWriter(path, getOptions(conf,tableProperties));
+    return new OrcRecordWriter(path, getOptions(conf, tableProperties));
   }
 
   private class DummyOrcRecordUpdater implements RecordUpdater {
@@ -229,8 +364,8 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
   }
 
   @Override
-  public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter(Path path,
-                                           Options options) throws IOException {
+  public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter
+        getRawRecordWriter(Path path, Options options) throws IOException {
     final Path filename = AcidUtils.createFilename(path, options);
     final OrcFile.WriterOptions opts =
         OrcFile.writerOptions(options.getConfiguration());

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
index db2ca15..3e2af23 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -18,20 +18,10 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import java.util.Arrays;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
-
-import com.google.common.collect.Lists;
 
 public class OrcUtils {
   private static final Log LOG = LogFactory.getLog(OrcUtils.class);
@@ -49,159 +39,44 @@ public class OrcUtils {
    * index 5 correspond to column d. After flattening list<string> gets 2 columns.
    *
    * @param selectedColumns - comma separated list of selected column names
-   * @param allColumns      - comma separated list of all column names
-   * @param inspector       - object inspector
+   * @param schema       - object schema
    * @return - boolean array with true value set for the specified column names
    */
-  public static boolean[] includeColumns(String selectedColumns, String allColumns,
-      ObjectInspector inspector) {
-    int numFlattenedCols = getFlattenedColumnsCount(inspector);
-    boolean[] results = new boolean[numFlattenedCols];
+  public static boolean[] includeColumns(String selectedColumns,
+                                         TypeDescription schema) {
+    int numFlattenedCols = schema.getMaximumId();
+    boolean[] results = new boolean[numFlattenedCols + 1];
     if ("*".equals(selectedColumns)) {
       Arrays.fill(results, true);
       return results;
     }
-    if (selectedColumns != null && !selectedColumns.isEmpty()) {
-      includeColumnsImpl(results, selectedColumns.toLowerCase(), allColumns, inspector);
-    }
-    return results;
-  }
-
-  private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
-      String allColumns,
-      ObjectInspector inspector) {
-      Map<String, List<Integer>> columnSpanMap = getColumnSpan(allColumns, inspector);
-      LOG.info("columnSpanMap: " + columnSpanMap);
-
-      String[] selCols = selectedColumns.split(",");
-      for (String sc : selCols) {
-        if (columnSpanMap.containsKey(sc)) {
-          List<Integer> colSpan = columnSpanMap.get(sc);
-          int start = colSpan.get(0);
-          int end = colSpan.get(1);
-          for (int i = start; i <= end; i++) {
-            includeColumns[i] = true;
+    if (selectedColumns != null &&
+        schema.getCategory() == TypeDescription.Category.STRUCT) {
+      List<String> fieldNames = schema.getFieldNames();
+      List<TypeDescription> fields = schema.getChildren();
+      for (String column: selectedColumns.split((","))) {
+        TypeDescription col = findColumn(column, fieldNames, fields);
+        if (col != null) {
+          for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
+            results[i] = true;
           }
         }
       }
-
-      LOG.info("includeColumns: " + Arrays.toString(includeColumns));
     }
-
-  private static Map<String, List<Integer>> getColumnSpan(String allColumns,
-      ObjectInspector inspector) {
-    // map that contains the column span for each column. Column span is the number of columns
-    // required after flattening. For a given object inspector this map contains the start column
-    // id and end column id (both inclusive) after flattening.
-    // EXAMPLE:
-    // schema: struct<a:int, b:float, c:map<string,int>>
-    // column span map for the above struct will be
-    // a => [1,1], b => [2,2], c => [3,5]
-    Map<String, List<Integer>> columnSpanMap = new HashMap<String, List<Integer>>();
-    if (allColumns != null) {
-      String[] columns = allColumns.split(",");
-      int startIdx = 0;
-      int endIdx = 0;
-      if (inspector instanceof StructObjectInspector) {
-        StructObjectInspector soi = (StructObjectInspector) inspector;
-        List<? extends StructField> fields = soi.getAllStructFieldRefs();
-        for (int i = 0; i < fields.size(); i++) {
-          StructField sf = fields.get(i);
-
-          // we get the type (category) from object inspector but column name from the argument.
-          // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
-          // instead it passes the internal column names (_col1,_col2).
-          ObjectInspector sfOI = sf.getFieldObjectInspector();
-          String colName = columns[i];
-
-          startIdx = endIdx + 1;
-          switch (sfOI.getCategory()) {
-            case PRIMITIVE:
-              endIdx += 1;
-              break;
-            case STRUCT:
-              endIdx += 1;
-              StructObjectInspector structInsp = (StructObjectInspector) sfOI;
-              List<? extends StructField> structFields = structInsp.getAllStructFieldRefs();
-              for (int j = 0; j < structFields.size(); ++j) {
-                endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector());
-              }
-              break;
-            case MAP:
-              endIdx += 1;
-              MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
-              endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
-              endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
-              break;
-            case LIST:
-              endIdx += 1;
-              ListObjectInspector listInsp = (ListObjectInspector) sfOI;
-              endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
-              break;
-            case UNION:
-              endIdx += 1;
-              UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
-              List<ObjectInspector> choices = unionInsp.getObjectInspectors();
-              for (int j = 0; j < choices.size(); ++j) {
-                endIdx += getFlattenedColumnsCount(choices.get(j));
-              }
-              break;
-            default:
-              throw new IllegalArgumentException("Bad category: " +
-                  inspector.getCategory());
-          }
-
-          columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
-        }
-      }
-    }
-    return columnSpanMap;
+    return results;
   }
 
-  /**
-   * Returns the number of columns after flatting complex types.
-   *
-   * @param inspector - object inspector
-   * @return
-   */
-  public static int getFlattenedColumnsCount(ObjectInspector inspector) {
-    int numWriters = 0;
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        numWriters += 1;
-        break;
-      case STRUCT:
-        numWriters += 1;
-        StructObjectInspector structInsp = (StructObjectInspector) inspector;
-        List<? extends StructField> fields = structInsp.getAllStructFieldRefs();
-        for (int i = 0; i < fields.size(); ++i) {
-          numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector());
-        }
-        break;
-      case MAP:
-        numWriters += 1;
-        MapObjectInspector mapInsp = (MapObjectInspector) inspector;
-        numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
-        numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
-        break;
-      case LIST:
-        numWriters += 1;
-        ListObjectInspector listInsp = (ListObjectInspector) inspector;
-        numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
-        break;
-      case UNION:
-        numWriters += 1;
-        UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
-        List<ObjectInspector> choices = unionInsp.getObjectInspectors();
-        for (int i = 0; i < choices.size(); ++i) {
-          numWriters += getFlattenedColumnsCount(choices.get(i));
-        }
-        break;
-      default:
-        throw new IllegalArgumentException("Bad category: " +
-            inspector.getCategory());
+  private static TypeDescription findColumn(String columnName,
+                                            List<String> fieldNames,
+                                            List<TypeDescription> fields) {
+    int i = 0;
+    for(String fieldName: fieldNames) {
+      if (fieldName.equalsIgnoreCase(columnName)) {
+        return fields.get(i);
+      } else {
+        i += 1;
+      }
     }
-    return numWriters;
+    return null;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 23b3b55..36fb858 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -347,7 +347,7 @@ public class ReaderImpl implements Reader {
         return version;
       }
     }
-    return OrcFile.WriterVersion.ORIGINAL;
+    return OrcFile.WriterVersion.FUTURE;
   }
 
   /** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
new file mode 100644
index 0000000..3481bb3
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
@@ -0,0 +1,466 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This is the description of the types in an ORC file.
+ */
+public class TypeDescription {
+  private static final int MAX_PRECISION = 38;
+  private static final int MAX_SCALE = 38;
+  private static final int DEFAULT_PRECISION = 38;
+  private static final int DEFAULT_SCALE = 10;
+  private static final int DEFAULT_LENGTH = 256;
+  public enum Category {
+    BOOLEAN("boolean", true),
+    BYTE("tinyint", true),
+    SHORT("smallint", true),
+    INT("int", true),
+    LONG("bigint", true),
+    FLOAT("float", true),
+    DOUBLE("double", true),
+    STRING("string", true),
+    DATE("date", true),
+    TIMESTAMP("timestamp", true),
+    BINARY("binary", true),
+    DECIMAL("decimal", true),
+    VARCHAR("varchar", true),
+    CHAR("char", true),
+    LIST("array", false),
+    MAP("map", false),
+    STRUCT("struct", false),
+    UNION("union", false);
+
+    Category(String name, boolean isPrimitive) {
+      this.name = name;
+      this.isPrimitive = isPrimitive;
+    }
+
+    final boolean isPrimitive;
+    final String name;
+
+    public boolean isPrimitive() {
+      return isPrimitive;
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  public static TypeDescription createBoolean() {
+    return new TypeDescription(Category.BOOLEAN);
+  }
+
+  public static TypeDescription createByte() {
+    return new TypeDescription(Category.BYTE);
+  }
+
+  public static TypeDescription createShort() {
+    return new TypeDescription(Category.SHORT);
+  }
+
+  public static TypeDescription createInt() {
+    return new TypeDescription(Category.INT);
+  }
+
+  public static TypeDescription createLong() {
+    return new TypeDescription(Category.LONG);
+  }
+
+  public static TypeDescription createFloat() {
+    return new TypeDescription(Category.FLOAT);
+  }
+
+  public static TypeDescription createDouble() {
+    return new TypeDescription(Category.DOUBLE);
+  }
+
+  public static TypeDescription createString() {
+    return new TypeDescription(Category.STRING);
+  }
+
+  public static TypeDescription createDate() {
+    return new TypeDescription(Category.DATE);
+  }
+
+  public static TypeDescription createTimestamp() {
+    return new TypeDescription(Category.TIMESTAMP);
+  }
+
+  public static TypeDescription createBinary() {
+    return new TypeDescription(Category.BINARY);
+  }
+
+  public static TypeDescription createDecimal() {
+    return new TypeDescription(Category.DECIMAL);
+  }
+
+  /**
+   * For decimal types, set the precision.
+   * @param precision the new precision
+   * @return this
+   */
+  public TypeDescription withPrecision(int precision) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("precision is only allowed on decimal"+
+         " and not " + category.name);
+    } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
+      throw new IllegalArgumentException("precision " + precision +
+          " is out of range 1 .. " + scale);
+    }
+    this.precision = precision;
+    return this;
+  }
+
+  /**
+   * For decimal types, set the scale.
+   * @param scale the new scale
+   * @return this
+   */
+  public TypeDescription withScale(int scale) {
+    if (category != Category.DECIMAL) {
+      throw new IllegalArgumentException("scale is only allowed on decimal"+
+          " and not " + category.name);
+    } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
+      throw new IllegalArgumentException("scale is out of range at " + scale);
+    }
+    this.scale = scale;
+    return this;
+  }
+
+  public static TypeDescription createVarchar() {
+    return new TypeDescription(Category.VARCHAR);
+  }
+
+  public static TypeDescription createChar() {
+    return new TypeDescription(Category.CHAR);
+  }
+
+  /**
+   * Set the maximum length for char and varchar types.
+   * @param maxLength the maximum value
+   * @return this
+   */
+  public TypeDescription withMaxLength(int maxLength) {
+    if (category != Category.VARCHAR && category != Category.CHAR) {
+      throw new IllegalArgumentException("maxLength is only allowed on char" +
+                   " and varchar and not " + category.name);
+    }
+    this.maxLength = maxLength;
+    return this;
+  }
+
+  public static TypeDescription createList(TypeDescription childType) {
+    TypeDescription result = new TypeDescription(Category.LIST);
+    result.children.add(childType);
+    childType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createMap(TypeDescription keyType,
+                                          TypeDescription valueType) {
+    TypeDescription result = new TypeDescription(Category.MAP);
+    result.children.add(keyType);
+    result.children.add(valueType);
+    keyType.parent = result;
+    valueType.parent = result;
+    return result;
+  }
+
+  public static TypeDescription createUnion() {
+    return new TypeDescription(Category.UNION);
+  }
+
+  public static TypeDescription createStruct() {
+    return new TypeDescription(Category.STRUCT);
+  }
+
+  /**
+   * Add a child to a union type.
+   * @param child a new child type to add
+   * @return the union type.
+   */
+  public TypeDescription addUnionChild(TypeDescription child) {
+    if (category != Category.UNION) {
+      throw new IllegalArgumentException("Can only add types to union type" +
+          " and not " + category);
+    }
+    children.add(child);
+    child.parent = this;
+    return this;
+  }
+
+  /**
+   * Add a field to a struct type as it is built.
+   * @param field the field name
+   * @param fieldType the type of the field
+   * @return the struct type
+   */
+  public TypeDescription addField(String field, TypeDescription fieldType) {
+    if (category != Category.STRUCT) {
+      throw new IllegalArgumentException("Can only add fields to struct type" +
+          " and not " + category);
+    }
+    fieldNames.add(field);
+    children.add(fieldType);
+    fieldType.parent = this;
+    return this;
+  }
+
+  /**
+   * Get the id for this type.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the sequential id
+   */
+  public int getId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (id == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return id;
+  }
+
+  /**
+   * Get the maximum id assigned to this type or its children.
+   * The first call will cause all of the the ids in tree to be assigned, so
+   * it should not be called before the type is completely built.
+   * @return the maximum id assigned under this type
+   */
+  public int getMaximumId() {
+    // if the id hasn't been assigned, assign all of the ids from the root
+    if (maxId == -1) {
+      TypeDescription root = this;
+      while (root.parent != null) {
+        root = root.parent;
+      }
+      root.assignIds(0);
+    }
+    return maxId;
+  }
+
+  /**
+   * Get the kind of this type.
+   * @return get the category for this type.
+   */
+  public Category getCategory() {
+    return category;
+  }
+
+  /**
+   * Get the maximum length of the type. Only used for char and varchar types.
+   * @return the maximum length of the string type
+   */
+  public int getMaxLength() {
+    return maxLength;
+  }
+
+  /**
+   * Get the precision of the decimal type.
+   * @return the number of digits for the precision.
+   */
+  public int getPrecision() {
+    return precision;
+  }
+
+  /**
+   * Get the scale of the decimal type.
+   * @return the number of digits for the scale.
+   */
+  public int getScale() {
+    return scale;
+  }
+
+  /**
+   * For struct types, get the list of field names.
+   * @return the list of field names.
+   */
+  public List<String> getFieldNames() {
+    return Collections.unmodifiableList(fieldNames);
+  }
+
+  /**
+   * Get the subtypes of this type.
+   * @return the list of children types
+   */
+  public List<TypeDescription> getChildren() {
+    return children == null ? null : Collections.unmodifiableList(children);
+  }
+
+  /**
+   * Assign ids to all of the nodes under this one.
+   * @param startId the lowest id to assign
+   * @return the next available id
+   */
+  private int assignIds(int startId) {
+    id = startId++;
+    if (children != null) {
+      for (TypeDescription child : children) {
+        startId = child.assignIds(startId);
+      }
+    }
+    maxId = startId - 1;
+    return startId;
+  }
+
+  private TypeDescription(Category category) {
+    this.category = category;
+    if (category.isPrimitive) {
+      children = null;
+    } else {
+      children = new ArrayList<>();
+    }
+    if (category == Category.STRUCT) {
+      fieldNames = new ArrayList<>();
+    } else {
+      fieldNames = null;
+    }
+  }
+
+  private int id = -1;
+  private int maxId = -1;
+  private TypeDescription parent;
+  private final Category category;
+  private final List<TypeDescription> children;
+  private final List<String> fieldNames;
+  private int maxLength = DEFAULT_LENGTH;
+  private int precision = DEFAULT_PRECISION;
+  private int scale = DEFAULT_SCALE;
+
+  public void printToBuffer(StringBuilder buffer) {
+    buffer.append(category.name);
+    switch (category) {
+      case DECIMAL:
+        buffer.append('(');
+        buffer.append(precision);
+        buffer.append(',');
+        buffer.append(scale);
+        buffer.append(')');
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append('(');
+        buffer.append(maxLength);
+        buffer.append(')');
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      case STRUCT:
+        buffer.append('<');
+        for(int i=0; i < children.size(); ++i) {
+          if (i != 0) {
+            buffer.append(',');
+          }
+          buffer.append(fieldNames.get(i));
+          buffer.append(':');
+          children.get(i).printToBuffer(buffer);
+        }
+        buffer.append('>');
+        break;
+      default:
+        break;
+    }
+  }
+
+  public String toString() {
+    StringBuilder buffer = new StringBuilder();
+    printToBuffer(buffer);
+    return buffer.toString();
+  }
+
+  private void printJsonToBuffer(String prefix, StringBuilder buffer,
+                                 int indent) {
+    for(int i=0; i < indent; ++i) {
+      buffer.append(' ');
+    }
+    buffer.append(prefix);
+    buffer.append("{\"category\": \"");
+    buffer.append(category.name);
+    buffer.append("\", \"id\": ");
+    buffer.append(getId());
+    buffer.append(", \"max\": ");
+    buffer.append(maxId);
+    switch (category) {
+      case DECIMAL:
+        buffer.append(", \"precision\": ");
+        buffer.append(precision);
+        buffer.append(", \"scale\": ");
+        buffer.append(scale);
+        break;
+      case CHAR:
+      case VARCHAR:
+        buffer.append(", \"length\": ");
+        buffer.append(maxLength);
+        break;
+      case LIST:
+      case MAP:
+      case UNION:
+        buffer.append(", \"children\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("", buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append("]");
+        break;
+      case STRUCT:
+        buffer.append(", \"fields\": [");
+        for(int i=0; i < children.size(); ++i) {
+          buffer.append('\n');
+          children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
+              buffer, indent + 2);
+          if (i != children.size() - 1) {
+            buffer.append(',');
+          }
+        }
+        buffer.append(']');
+        break;
+      default:
+        break;
+    }
+    buffer.append('}');
+  }
+
+  public String toJson() {
+    StringBuilder buffer = new StringBuilder();
+    printJsonToBuffer("", buffer, 0);
+    return buffer.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
index 6411e3f..8991f2d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -26,6 +28,13 @@ import java.util.List;
  * The interface for writing ORC files.
  */
 public interface Writer {
+
+  /**
+   * Get the schema for this writer
+   * @return the file schema
+   */
+  TypeDescription getSchema();
+
   /**
    * Add arbitrary meta-data to the ORC file. This may be called at any point
    * until the Writer is closed. If the same key is passed a second time, the

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 7aa8d65..767d3f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
 import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy;
@@ -54,7 +53,6 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
@@ -72,9 +70,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspect
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.io.BytesWritable;
@@ -127,6 +122,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   private final int bufferSize;
   private final long blockSize;
   private final double paddingTolerance;
+  private final TypeDescription schema;
+
   // the streams that make up the current stripe
   private final Map<StreamName, BufferedStream> streams =
     new TreeMap<StreamName, BufferedStream>();
@@ -165,6 +162,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       Path path,
       Configuration conf,
       ObjectInspector inspector,
+      TypeDescription schema,
       long stripeSize,
       CompressionKind compress,
       int bufferSize,
@@ -183,6 +181,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     this.path = path;
     this.conf = conf;
     this.callback = callback;
+    this.schema = schema;
     if (callback != null) {
       callbackContext = new OrcFile.WriterContext(){
 
@@ -207,21 +206,18 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     this.memoryManager = memoryManager;
     buildIndex = rowIndexStride > 0;
     codec = createCodec(compress);
-    String allColumns = conf.get(IOConstants.COLUMNS);
-    if (allColumns == null) {
-      allColumns = getColumnNamesFromInspector(inspector);
-    }
-    this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+    int numColumns = schema.getMaximumId() + 1;
+    this.bufferSize = getEstimatedBufferSize(getMemoryAvailableForORC(),
+        codec != null, numColumns, bufferSize);
     if (version == OrcFile.Version.V_0_11) {
       /* do not write bloom filters for ORC v11 */
-      this.bloomFilterColumns =
-          OrcUtils.includeColumns(null, allColumns, inspector);
+      this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
     } else {
       this.bloomFilterColumns =
-          OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector);
+          OrcUtils.includeColumns(bloomFilterColumnNames, schema);
     }
     this.bloomFilterFpp = bloomFilterFpp;
-    treeWriter = createTreeWriter(inspector, streamFactory, false);
+    treeWriter = createTreeWriter(inspector, schema, streamFactory, false);
     if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
       throw new IllegalArgumentException("Row stride must be at least " +
           MIN_ROW_INDEX_STRIDE);
@@ -231,62 +227,42 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     memoryManager.addWriter(path, stripeSize, this);
   }
 
-  private String getColumnNamesFromInspector(ObjectInspector inspector) {
-    List<String> fieldNames = Lists.newArrayList();
-    Joiner joiner = Joiner.on(",");
-    if (inspector instanceof StructObjectInspector) {
-      StructObjectInspector soi = (StructObjectInspector) inspector;
-      List<? extends StructField> fields = soi.getAllStructFieldRefs();
-      for(StructField sf : fields) {
-        fieldNames.add(sf.getFieldName());
-      }
-    }
-    return joiner.join(fieldNames);
-  }
+  static int getEstimatedBufferSize(long availableMem,
+                                    boolean isCompressed,
+                                    int columnCount, int bs) {
+    if (columnCount > COLUMN_COUNT_THRESHOLD) {
+      // In BufferedStream, there are 3 outstream buffers (compressed,
+      // uncompressed and overflow) and list of previously compressed buffers.
+      // Since overflow buffer is rarely used, lets consider only 2 allocation.
+      // Also, initially, the list of compression buffers will be empty.
+      final int outStreamBuffers = isCompressed ? 2 : 1;
 
-  @VisibleForTesting
-  int getEstimatedBufferSize(int bs) {
-      return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
-  }
+      // max possible streams per column is 5. For string columns, there is
+      // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
+      final int maxStreams = 5;
 
-  int getEstimatedBufferSize(String colNames, int bs) {
-    long availableMem = getMemoryAvailableForORC();
-    if (colNames != null) {
-      final int numCols = colNames.split(",").length;
-      if (numCols > COLUMN_COUNT_THRESHOLD) {
-        // In BufferedStream, there are 3 outstream buffers (compressed,
-        // uncompressed and overflow) and list of previously compressed buffers.
-        // Since overflow buffer is rarely used, lets consider only 2 allocation.
-        // Also, initially, the list of compression buffers will be empty.
-        final int outStreamBuffers = codec == null ? 1 : 2;
-
-        // max possible streams per column is 5. For string columns, there is
-        // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
-        final int maxStreams = 5;
-
-        // Lets assume 10% memory for holding dictionary in memory and other
-        // object allocations
-        final long miscAllocation = (long) (0.1f * availableMem);
-
-        // compute the available memory
-        final long remainingMem = availableMem - miscAllocation;
-
-        int estBufferSize = (int) (remainingMem /
-            (maxStreams * outStreamBuffers * numCols));
-        estBufferSize = getClosestBufferSize(estBufferSize, bs);
-        if (estBufferSize > bs) {
-          estBufferSize = bs;
-        }
+      // Lets assume 10% memory for holding dictionary in memory and other
+      // object allocations
+      final long miscAllocation = (long) (0.1f * availableMem);
 
-        LOG.info("WIDE TABLE - Number of columns: " + numCols +
-            " Chosen compression buffer size: " + estBufferSize);
-        return estBufferSize;
+      // compute the available memory
+      final long remainingMem = availableMem - miscAllocation;
+
+      int estBufferSize = (int) (remainingMem /
+          (maxStreams * outStreamBuffers * columnCount));
+      estBufferSize = getClosestBufferSize(estBufferSize);
+      if (estBufferSize > bs) {
+        estBufferSize = bs;
       }
+
+      LOG.info("WIDE TABLE - Number of columns: " + columnCount +
+          " Chosen compression buffer size: " + estBufferSize);
+      return estBufferSize;
     }
     return bs;
   }
 
-  private int getClosestBufferSize(int estBufferSize, int bs) {
+  private static int getClosestBufferSize(int estBufferSize) {
     final int kb4 = 4 * 1024;
     final int kb8 = 8 * 1024;
     final int kb16 = 16 * 1024;
@@ -546,15 +522,6 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     }
 
     /**
-     * Get the current column id. After creating all tree writers this count should tell how many
-     * columns (including columns within nested complex objects) are created in total.
-     * @return current column id
-     */
-    public int getCurrentColumnId() {
-      return columnCount;
-    }
-
-    /**
      * Get the stride rate of the row index.
      */
     public int getRowIndexStride() {
@@ -666,11 +633,13 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
      * Create a tree writer.
      * @param columnId the column id of the column to write
      * @param inspector the object inspector to use
+     * @param schema the row schema
      * @param streamFactory limited access to the Writer's data.
      * @param nullable can the value be null?
      * @throws IOException
      */
     TreeWriter(int columnId, ObjectInspector inspector,
+               TypeDescription schema,
                StreamFactory streamFactory,
                boolean nullable) throws IOException {
       this.streamFactory = streamFactory;
@@ -686,9 +655,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       }
       this.foundNulls = false;
       createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
-      indexStatistics = ColumnStatisticsImpl.create(inspector);
-      stripeColStatistics = ColumnStatisticsImpl.create(inspector);
-      fileStatistics = ColumnStatisticsImpl.create(inspector);
+      indexStatistics = ColumnStatisticsImpl.create(schema);
+      stripeColStatistics = ColumnStatisticsImpl.create(schema);
+      fileStatistics = ColumnStatisticsImpl.create(schema);
       childrenWriters = new TreeWriter[0];
       rowIndex = OrcProto.RowIndex.newBuilder();
       rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
@@ -749,7 +718,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     /**
      * Add a new value to the column.
-     * @param obj
+     * @param obj the object to write
      * @throws IOException
      */
     void write(Object obj) throws IOException {
@@ -919,9 +888,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     BooleanTreeWriter(int columnId,
                       ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       PositionedOutputStream out = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.writer = new BitFieldWriter(out, 1);
@@ -958,9 +928,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     ByteTreeWriter(int columnId,
                       ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.writer = new RunLengthByteWriter(writer.createStream(id,
           OrcProto.Stream.Kind.DATA));
       recordPosition(rowIndexPosition);
@@ -1003,9 +974,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     IntegerTreeWriter(int columnId,
                       ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       OutStream out = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
@@ -1079,9 +1051,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     FloatTreeWriter(int columnId,
                       ObjectInspector inspector,
+                      TypeDescription schema,
                       StreamFactory writer,
                       boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.utils = new SerializationUtils();
@@ -1123,9 +1096,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     DoubleTreeWriter(int columnId,
                     ObjectInspector inspector,
+                    TypeDescription schema,
                     StreamFactory writer,
                     boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.utils = new SerializationUtils();
@@ -1184,9 +1158,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     StringTreeWriter(int columnId,
                      ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       stringOutput = writer.createStream(id,
           OrcProto.Stream.Kind.DICTIONARY_DATA);
@@ -1423,9 +1398,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     CharTreeWriter(int columnId,
         ObjectInspector inspector,
+        TypeDescription schema,
         StreamFactory writer,
         boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
     }
 
     /**
@@ -1445,9 +1421,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     VarcharTreeWriter(int columnId,
         ObjectInspector inspector,
+        TypeDescription schema,
         StreamFactory writer,
         boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
     }
 
     /**
@@ -1467,9 +1444,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     BinaryTreeWriter(int columnId,
                      ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.stream = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
@@ -1531,9 +1509,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     TimestampTreeWriter(int columnId,
                      ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       this.seconds = createIntegerWriter(writer.createStream(id,
           OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
@@ -1610,9 +1589,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     DateTreeWriter(int columnId,
                    ObjectInspector inspector,
+                   TypeDescription schema,
                    StreamFactory writer,
                    boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       OutStream out = writer.createStream(id,
           OrcProto.Stream.Kind.DATA);
       this.isDirectV2 = isNewWriteFormat(writer);
@@ -1666,9 +1646,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     DecimalTreeWriter(int columnId,
                         ObjectInspector inspector,
+                        TypeDescription schema,
                         StreamFactory writer,
                         boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA);
       this.scaleStream = createIntegerWriter(writer.createStream(id,
@@ -1726,16 +1707,21 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     private final List<? extends StructField> fields;
     StructTreeWriter(int columnId,
                      ObjectInspector inspector,
+                     TypeDescription schema,
                      StreamFactory writer,
                      boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
+      List<TypeDescription> children = schema.getChildren();
       StructObjectInspector structObjectInspector =
         (StructObjectInspector) inspector;
       fields = structObjectInspector.getAllStructFieldRefs();
-      childrenWriters = new TreeWriter[fields.size()];
+      childrenWriters = new TreeWriter[children.size()];
       for(int i=0; i < childrenWriters.length; ++i) {
+        ObjectInspector childOI = i < fields.size() ?
+            fields.get(i).getFieldObjectInspector() : null;
         childrenWriters[i] = createTreeWriter(
-          fields.get(i).getFieldObjectInspector(), writer, true);
+          childOI, children.get(i), writer,
+          true);
       }
       recordPosition(rowIndexPosition);
     }
@@ -1770,15 +1756,16 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     ListTreeWriter(int columnId,
                    ObjectInspector inspector,
+                   TypeDescription schema,
                    StreamFactory writer,
                    boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
-      ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
+      ObjectInspector childOI =
+        ((ListObjectInspector) inspector).getListElementObjectInspector();
       childrenWriters = new TreeWriter[1];
       childrenWriters[0] =
-        createTreeWriter(listObjectInspector.getListElementObjectInspector(),
-          writer, true);
+        createTreeWriter(childOI, schema.getChildren().get(0), writer, true);
       lengths = createIntegerWriter(writer.createStream(columnId,
           OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
       recordPosition(rowIndexPosition);
@@ -1834,16 +1821,20 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     MapTreeWriter(int columnId,
                   ObjectInspector inspector,
+                  TypeDescription schema,
                   StreamFactory writer,
                   boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       this.isDirectV2 = isNewWriteFormat(writer);
       MapObjectInspector insp = (MapObjectInspector) inspector;
       childrenWriters = new TreeWriter[2];
+      List<TypeDescription> children = schema.getChildren();
       childrenWriters[0] =
-        createTreeWriter(insp.getMapKeyObjectInspector(), writer, true);
+        createTreeWriter(insp.getMapKeyObjectInspector(), children.get(0),
+                         writer, true);
       childrenWriters[1] =
-        createTreeWriter(insp.getMapValueObjectInspector(), writer, true);
+        createTreeWriter(insp.getMapValueObjectInspector(), children.get(1),
+                         writer, true);
       lengths = createIntegerWriter(writer.createStream(columnId,
           OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
       recordPosition(rowIndexPosition);
@@ -1901,14 +1892,17 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     UnionTreeWriter(int columnId,
                   ObjectInspector inspector,
+                  TypeDescription schema,
                   StreamFactory writer,
                   boolean nullable) throws IOException {
-      super(columnId, inspector, writer, nullable);
+      super(columnId, inspector, schema, writer, nullable);
       UnionObjectInspector insp = (UnionObjectInspector) inspector;
       List<ObjectInspector> choices = insp.getObjectInspectors();
-      childrenWriters = new TreeWriter[choices.size()];
+      List<TypeDescription> children = schema.getChildren();
+      childrenWriters = new TreeWriter[children.size()];
       for(int i=0; i < childrenWriters.length; ++i) {
-        childrenWriters[i] = createTreeWriter(choices.get(i), writer, true);
+        childrenWriters[i] = createTreeWriter(choices.get(i),
+                                              children.get(i), writer, true);
       }
       tags =
         new RunLengthByteWriter(writer.createStream(columnId,
@@ -1949,168 +1943,151 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   private static TreeWriter createTreeWriter(ObjectInspector inspector,
+                                             TypeDescription schema,
                                              StreamFactory streamFactory,
                                              boolean nullable) throws IOException {
-    switch (inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
-          case BOOLEAN:
-            return new BooleanTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case BYTE:
-            return new ByteTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case SHORT:
-          case INT:
-          case LONG:
-            return new IntegerTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case FLOAT:
-            return new FloatTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case DOUBLE:
-            return new DoubleTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case STRING:
-            return new StringTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case CHAR:
-            return new CharTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case VARCHAR:
-            return new VarcharTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case BINARY:
-            return new BinaryTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case TIMESTAMP:
-            return new TimestampTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case DATE:
-            return new DateTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory, nullable);
-          case DECIMAL:
-            return new DecimalTreeWriter(streamFactory.getNextColumnId(),
-                inspector, streamFactory,  nullable);
-          default:
-            throw new IllegalArgumentException("Bad primitive category " +
-              ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
-        }
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        return new BooleanTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case BYTE:
+        return new ByteTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case SHORT:
+      case INT:
+      case LONG:
+        return new IntegerTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case FLOAT:
+        return new FloatTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case DOUBLE:
+        return new DoubleTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case STRING:
+        return new StringTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case CHAR:
+        return new CharTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case VARCHAR:
+        return new VarcharTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case BINARY:
+        return new BinaryTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case TIMESTAMP:
+        return new TimestampTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case DATE:
+        return new DateTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
+      case DECIMAL:
+        return new DecimalTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory,  nullable);
       case STRUCT:
-        return new StructTreeWriter(streamFactory.getNextColumnId(), inspector,
-            streamFactory, nullable);
+        return new StructTreeWriter(streamFactory.getNextColumnId(),
+            inspector, schema, streamFactory, nullable);
       case MAP:
         return new MapTreeWriter(streamFactory.getNextColumnId(), inspector,
-            streamFactory, nullable);
+            schema, streamFactory, nullable);
       case LIST:
         return new ListTreeWriter(streamFactory.getNextColumnId(), inspector,
-            streamFactory, nullable);
+            schema, streamFactory, nullable);
       case UNION:
         return new UnionTreeWriter(streamFactory.getNextColumnId(), inspector,
-            streamFactory, nullable);
+            schema, streamFactory, nullable);
       default:
         throw new IllegalArgumentException("Bad category: " +
-          inspector.getCategory());
+            schema.getCategory());
     }
   }
 
   private static void writeTypes(OrcProto.Footer.Builder builder,
-                                 TreeWriter treeWriter) {
+                                 TypeDescription schema) {
     OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
-    switch (treeWriter.inspector.getCategory()) {
-      case PRIMITIVE:
-        switch (((PrimitiveObjectInspector) treeWriter.inspector).
-                 getPrimitiveCategory()) {
-          case BOOLEAN:
-            type.setKind(OrcProto.Type.Kind.BOOLEAN);
-            break;
-          case BYTE:
-            type.setKind(OrcProto.Type.Kind.BYTE);
-            break;
-          case SHORT:
-            type.setKind(OrcProto.Type.Kind.SHORT);
-            break;
-          case INT:
-            type.setKind(OrcProto.Type.Kind.INT);
-            break;
-          case LONG:
-            type.setKind(OrcProto.Type.Kind.LONG);
-            break;
-          case FLOAT:
-            type.setKind(OrcProto.Type.Kind.FLOAT);
-            break;
-          case DOUBLE:
-            type.setKind(OrcProto.Type.Kind.DOUBLE);
-            break;
-          case STRING:
-            type.setKind(OrcProto.Type.Kind.STRING);
-            break;
-          case CHAR:
-            // The char length needs to be written to file and should be available
-            // from the object inspector
-            CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
-            type.setKind(Type.Kind.CHAR);
-            type.setMaximumLength(charTypeInfo.getLength());
-            break;
-          case VARCHAR:
-            // The varchar length needs to be written to file and should be available
-            // from the object inspector
-            VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
-            type.setKind(Type.Kind.VARCHAR);
-            type.setMaximumLength(typeInfo.getLength());
-            break;
-          case BINARY:
-            type.setKind(OrcProto.Type.Kind.BINARY);
-            break;
-          case TIMESTAMP:
-            type.setKind(OrcProto.Type.Kind.TIMESTAMP);
-            break;
-          case DATE:
-            type.setKind(OrcProto.Type.Kind.DATE);
-            break;
-          case DECIMAL:
-            DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)((PrimitiveObjectInspector)treeWriter.inspector).getTypeInfo();
-            type.setKind(OrcProto.Type.Kind.DECIMAL);
-            type.setPrecision(decTypeInfo.precision());
-            type.setScale(decTypeInfo.scale());
-            break;
-          default:
-            throw new IllegalArgumentException("Unknown primitive category: " +
-              ((PrimitiveObjectInspector) treeWriter.inspector).
-                getPrimitiveCategory());
-        }
+    List<TypeDescription> children = schema.getChildren();
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+        type.setKind(OrcProto.Type.Kind.BOOLEAN);
+        break;
+      case BYTE:
+        type.setKind(OrcProto.Type.Kind.BYTE);
+        break;
+      case SHORT:
+        type.setKind(OrcProto.Type.Kind.SHORT);
+        break;
+      case INT:
+        type.setKind(OrcProto.Type.Kind.INT);
+        break;
+      case LONG:
+        type.setKind(OrcProto.Type.Kind.LONG);
+        break;
+      case FLOAT:
+        type.setKind(OrcProto.Type.Kind.FLOAT);
+        break;
+      case DOUBLE:
+        type.setKind(OrcProto.Type.Kind.DOUBLE);
+        break;
+      case STRING:
+        type.setKind(OrcProto.Type.Kind.STRING);
+        break;
+      case CHAR:
+        type.setKind(OrcProto.Type.Kind.CHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case VARCHAR:
+        type.setKind(Type.Kind.VARCHAR);
+        type.setMaximumLength(schema.getMaxLength());
+        break;
+      case BINARY:
+        type.setKind(OrcProto.Type.Kind.BINARY);
+        break;
+      case TIMESTAMP:
+        type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+        break;
+      case DATE:
+        type.setKind(OrcProto.Type.Kind.DATE);
+        break;
+      case DECIMAL:
+        type.setKind(OrcProto.Type.Kind.DECIMAL);
+        type.setPrecision(schema.getPrecision());
+        type.setScale(schema.getScale());
         break;
       case LIST:
         type.setKind(OrcProto.Type.Kind.LIST);
-        type.addSubtypes(treeWriter.childrenWriters[0].id);
+        type.addSubtypes(children.get(0).getId());
         break;
       case MAP:
         type.setKind(OrcProto.Type.Kind.MAP);
-        type.addSubtypes(treeWriter.childrenWriters[0].id);
-        type.addSubtypes(treeWriter.childrenWriters[1].id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
+        }
         break;
       case STRUCT:
         type.setKind(OrcProto.Type.Kind.STRUCT);
-        for(TreeWriter child: treeWriter.childrenWriters) {
-          type.addSubtypes(child.id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
         }
-        for(StructField field: ((StructTreeWriter) treeWriter).fields) {
-          type.addFieldNames(field.getFieldName());
+        for(String field: schema.getFieldNames()) {
+          type.addFieldNames(field);
         }
         break;
       case UNION:
         type.setKind(OrcProto.Type.Kind.UNION);
-        for(TreeWriter child: treeWriter.childrenWriters) {
-          type.addSubtypes(child.id);
+        for(TypeDescription t: children) {
+          type.addSubtypes(t.getId());
         }
         break;
       default:
         throw new IllegalArgumentException("Unknown category: " +
-          treeWriter.inspector.getCategory());
+          schema.getCategory());
     }
     builder.addTypes(type);
-    for(TreeWriter child: treeWriter.childrenWriters) {
-      writeTypes(builder, child);
+    if (children != null) {
+      for(TypeDescription child: children) {
+        writeTypes(builder, child);
+      }
     }
   }
 
@@ -2243,73 +2220,58 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   private long computeRawDataSize() {
-    long result = 0;
-    for (TreeWriter child : treeWriter.getChildrenWriters()) {
-      result += getRawDataSizeFromInspectors(child, child.inspector);
-    }
-    return result;
+    return getRawDataSize(treeWriter, schema);
   }
 
-  private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) {
+  private long getRawDataSize(TreeWriter child,
+                              TypeDescription schema) {
     long total = 0;
-    switch (oi.getCategory()) {
-    case PRIMITIVE:
-      total += getRawDataSizeFromPrimitives(child, oi);
-      break;
-    case LIST:
-    case MAP:
-    case UNION:
-    case STRUCT:
-      for (TreeWriter tw : child.childrenWriters) {
-        total += getRawDataSizeFromInspectors(tw, tw.inspector);
-      }
-      break;
-    default:
-      LOG.debug("Unknown object inspector category.");
-      break;
-    }
-    return total;
-  }
-
-  private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
-    long result = 0;
     long numVals = child.fileStatistics.getNumberOfValues();
-    switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case FLOAT:
-      return numVals * JavaDataModel.get().primitive1();
-    case LONG:
-    case DOUBLE:
-      return numVals * JavaDataModel.get().primitive2();
-    case STRING:
-    case VARCHAR:
-    case CHAR:
-      // ORC strings are converted to java Strings. so use JavaDataModel to
-      // compute the overall size of strings
-      child = (StringTreeWriter) child;
-      StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
-      numVals = numVals == 0 ? 1 : numVals;
-      int avgStringLen = (int) (scs.getSum() / numVals);
-      return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
-    case DECIMAL:
-      return numVals * JavaDataModel.get().lengthOfDecimal();
-    case DATE:
-      return numVals * JavaDataModel.get().lengthOfDate();
-    case BINARY:
-      // get total length of binary blob
-      BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
-      return bcs.getSum();
-    case TIMESTAMP:
-      return numVals * JavaDataModel.get().lengthOfTimestamp();
-    default:
-      LOG.debug("Unknown primitive category.");
-      break;
+    switch (schema.getCategory()) {
+      case BOOLEAN:
+      case BYTE:
+      case SHORT:
+      case INT:
+      case FLOAT:
+        return numVals * JavaDataModel.get().primitive1();
+      case LONG:
+      case DOUBLE:
+        return numVals * JavaDataModel.get().primitive2();
+      case STRING:
+      case VARCHAR:
+      case CHAR:
+        // ORC strings are converted to java Strings. so use JavaDataModel to
+        // compute the overall size of strings
+        StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+        numVals = numVals == 0 ? 1 : numVals;
+        int avgStringLen = (int) (scs.getSum() / numVals);
+        return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+      case DECIMAL:
+        return numVals * JavaDataModel.get().lengthOfDecimal();
+      case DATE:
+        return numVals * JavaDataModel.get().lengthOfDate();
+      case BINARY:
+        // get total length of binary blob
+        BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+        return bcs.getSum();
+      case TIMESTAMP:
+        return numVals * JavaDataModel.get().lengthOfTimestamp();
+      case LIST:
+      case MAP:
+      case UNION:
+      case STRUCT: {
+        TreeWriter[] childWriters = child.getChildrenWriters();
+        List<TypeDescription> childTypes = schema.getChildren();
+        for (int i=0; i < childWriters.length; ++i) {
+          total += getRawDataSize(childWriters[i], childTypes.get(i));
+        }
+        break;
+      }
+      default:
+        LOG.debug("Unknown object inspector category.");
+        break;
     }
-
-    return result;
+    return total;
   }
 
   private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
@@ -2356,7 +2318,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
     // populate raw data size
     rawDataSize = computeRawDataSize();
     // serialize the types
-    writeTypes(builder, treeWriter);
+    writeTypes(builder, schema);
     // add the stripe information
     for(OrcProto.StripeInformation stripe: stripes) {
       builder.addStripes(stripe);
@@ -2385,7 +2347,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
         .setMagic(OrcFile.MAGIC)
         .addVersion(version.getMajor())
         .addVersion(version.getMinor())
-        .setWriterVersion(OrcFile.WriterVersion.HIVE_8732.getId());
+        .setWriterVersion(OrcFile.WriterVersion.HIVE_4243.getId());
     if (compress != CompressionKind.NONE) {
       builder.setCompressionBlockSize(bufferSize);
     }
@@ -2410,6 +2372,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
   }
 
   @Override
+  public TypeDescription getSchema() {
+    return schema;
+  }
+
+  @Override
   public void addUserMetadata(String name, ByteBuffer value) {
     userMetadata.put(name, ByteString.copyFrom(value));
   }
@@ -2493,12 +2460,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
 
     getStream();
     long start = rawWriter.getPos();
-    long stripeLen = length;
     long availBlockSpace = blockSize - (start % blockSize);
 
     // see if stripe can fit in the current hdfs block, else pad the remaining
     // space in the block
-    if (stripeLen < blockSize && stripeLen > availBlockSpace &&
+    if (length < blockSize && length > availBlockSpace &&
         addBlockPadding) {
       byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)];
       LOG.info(String.format("Padding ORC by %d bytes while merging..",

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
----------------------------------------------------------------------
diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 3b7a9b3..acadef9 100644
--- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -213,6 +213,7 @@ message PostScript {
   // Version of the writer:
   //   0 (or missing) = original
   //   1 = HIVE-8732 fixed
+  //   2 = HIVE-4243 fixed
   optional uint32 writerVersion = 6;
   // Leave this last in the record
   optional string magic = 8000;

http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
index 4d30377..4e3bc90 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
@@ -48,11 +48,10 @@ public class TestColumnStatistics {
 
   @Test
   public void testLongMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaIntObjectInspector;
+    TypeDescription schema = TypeDescription.createInt();
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateInteger(10);
     stats1.updateInteger(10);
     stats2.updateInteger(1);
@@ -71,11 +70,10 @@ public class TestColumnStatistics {
 
   @Test
   public void testDoubleMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
+    TypeDescription schema = TypeDescription.createDouble();
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateDouble(10.0);
     stats1.updateDouble(100.0);
     stats2.updateDouble(1.0);
@@ -95,11 +93,10 @@ public class TestColumnStatistics {
 
   @Test
   public void testStringMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+    TypeDescription schema = TypeDescription.createString();
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateString(new Text("bob"));
     stats1.updateString(new Text("david"));
     stats1.updateString(new Text("charles"));
@@ -119,11 +116,10 @@ public class TestColumnStatistics {
 
   @Test
   public void testDateMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaDateObjectInspector;
+    TypeDescription schema = TypeDescription.createDate();
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateDate(new DateWritable(1000));
     stats1.updateDate(new DateWritable(100));
     stats2.updateDate(new DateWritable(10));
@@ -142,11 +138,10 @@ public class TestColumnStatistics {
 
   @Test
   public void testTimestampMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
+    TypeDescription schema = TypeDescription.createTimestamp();
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateTimestamp(new Timestamp(10));
     stats1.updateTimestamp(new Timestamp(100));
     stats2.updateTimestamp(new Timestamp(1));
@@ -165,11 +160,11 @@ public class TestColumnStatistics {
 
   @Test
   public void testDecimalMerge() throws Exception {
-    ObjectInspector inspector =
-        PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector;
+    TypeDescription schema = TypeDescription.createDecimal()
+        .withPrecision(38).withScale(16);
 
-    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
-    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateDecimal(HiveDecimal.create(10));
     stats1.updateDecimal(HiveDecimal.create(100));
     stats2.updateDecimal(HiveDecimal.create(1));

[14/22] hive git commit: HIVE-11972 : [Refactor] Improve determination of dynamic partitioning columns in FileSink Operator (Ashutosh Chauhan via Prasanth J)

Posted by se...@apache.org.

HIVE-11972 : [Refactor] Improve determination of dynamic partitioning columns in FileSink Operator (Ashutosh Chauhan via Prasanth J)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24988f77
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24988f77
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24988f77

Branch: refs/heads/llap
Commit: 24988f77f2898bbcd91f5665b865bcc251e3cade
Parents: 522bb60
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Sat Sep 26 12:19:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Oct 1 11:41:53 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  19 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  17 +
 .../optimizer/ConstantPropagateProcFactory.java |  11 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |  10 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  30 +-
 .../hive/ql/plan/DynamicPartitionCtx.java       |  27 --
 .../hive/ql/exec/TestFileSinkOperator.java      | 384 ++++++++++++-------
 7 files changed, 284 insertions(+), 214 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 2604d5d..39944a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -493,24 +493,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
       assert inputObjInspectors.length == 1 : "FileSinkOperator should have 1 parent, but it has "
           + inputObjInspectors.length;
       StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[0];
-      // remove the last dpMapping.size() columns from the OI
-      List<? extends StructField> fieldOI = soi.getAllStructFieldRefs();
-      ArrayList<ObjectInspector> newFieldsOI = new ArrayList<ObjectInspector>();
-      ArrayList<String> newFieldsName = new ArrayList<String>();
-      this.dpStartCol = 0;
-      for (StructField sf : fieldOI) {
-        String fn = sf.getFieldName();
-        if (!dpCtx.getInputToDPCols().containsKey(fn)) {
-          newFieldsOI.add(sf.getFieldObjectInspector());
-          newFieldsName.add(sf.getFieldName());
-          this.dpStartCol++;
-        } else {
-          // once we found the start column for partition column we are done
-          break;
-        }
-      }
-      assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty";
-
+      this.dpStartCol = Utilities.getDPColOffset(conf);
       this.subSetOI = new SubStructObjectInspector(soi, 0, this.dpStartCol);
       this.dpVals = new ArrayList<String>(numDynParts);
       this.dpWritables = new ArrayList<Object>(numDynParts);

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index bcf85a4..5b21af9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -119,6 +119,7 @@ import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
@@ -3916,4 +3917,20 @@ public final class Utilities {
       HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD, "");
     }
   }
+
+  public static int getDPColOffset(FileSinkDesc conf) {
+
+    if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
+      // For deletes, there is only ROW__ID in non-partitioning, non-bucketing columns.
+      //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+      return 1;
+    } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
+      // For updates, ROW__ID is an extra column at index 0.
+      //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+      return getColumnNames(conf.getTableInfo().getProperties()).size() + 1;
+    } else {
+      return getColumnNames(conf.getTableInfo().getProperties()).size();
+    }
+
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index 5c6a6df..25156b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -843,7 +843,7 @@ public final class ConstantPropagateProcFactory {
           }
         }
         if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
-          // nested complex types cannot be folded cleanly 
+          // nested complex types cannot be folded cleanly
           return null;
         }
         Object value = constant.getValue();
@@ -1163,16 +1163,15 @@ public final class ConstantPropagateProcFactory {
       DynamicPartitionCtx dpCtx = fsdesc.getDynPartCtx();
       if (dpCtx != null) {
 
-        // If all dynamic partitions are propagated as constant, remove DP.
-        Set<String> inputs = dpCtx.getInputToDPCols().keySet();
-
         // Assume only 1 parent for FS operator
         Operator<? extends Serializable> parent = op.getParentOperators().get(0);
         Map<ColumnInfo, ExprNodeDesc> parentConstants = cppCtx.getPropagatedConstants(parent);
         RowSchema rs = parent.getSchema();
         boolean allConstant = true;
-        for (String input : inputs) {
-          ColumnInfo ci = rs.getColumnInfo(input);
+        int dpColStartIdx = Utilities.getDPColOffset(fsdesc);
+        List<ColumnInfo> colInfos = rs.getSignature();
+        for (int i = dpColStartIdx; i < colInfos.size(); i++) {
+          ColumnInfo ci = colInfos.get(i);
           if (parentConstants.get(ci) == null) {
             allConstant = false;
             break;

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 02fbdfe..c696fd5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -497,9 +497,6 @@ public final class GenMapRedUtils {
         partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
       } catch (SemanticException e) {
         throw e;
-      } catch (HiveException e) {
-        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
-        throw new SemanticException(e.getMessage(), e);
       }
     }
 
@@ -990,7 +987,7 @@ public final class GenMapRedUtils {
     fileSinkOp.setParentOperators(Utilities.makeList(parent));
 
     // Create a dummy TableScanOperator for the file generated through fileSinkOp
-    TableScanOperator tableScanOp = (TableScanOperator) createTemporaryTableScanOperator(
+    TableScanOperator tableScanOp = createTemporaryTableScanOperator(
             parent.getSchema());
 
     // Connect this TableScanOperator to child.
@@ -1235,19 +1232,16 @@ public final class GenMapRedUtils {
       // adding DP ColumnInfo to the RowSchema signature
       ArrayList<ColumnInfo> signature = inputRS.getSignature();
       String tblAlias = fsInputDesc.getTableInfo().getTableName();
-      LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
       for (String dpCol : dpCtx.getDPColNames()) {
         ColumnInfo colInfo = new ColumnInfo(dpCol,
             TypeInfoFactory.stringTypeInfo, // all partition column type should be string
             tblAlias, true); // partition column is virtual column
         signature.add(colInfo);
-        colMap.put(dpCol, dpCol); // input and output have the same column name
       }
       inputRS.setSignature(signature);
 
       // create another DynamicPartitionCtx, which has a different input-to-DP column mapping
       DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
-      dpCtx2.setInputToDPCols(colMap);
       fsOutputDesc.setDynPartCtx(dpCtx2);
 
       // update the FileSinkOperator to include partition columns
@@ -1896,7 +1890,7 @@ public final class GenMapRedUtils {
         "Partition Names, " + Arrays.toString(partNames) + " don't match partition Types, "
         + Arrays.toString(partTypes));
 
-    Map<String, String> typeMap = new HashMap();
+    Map<String, String> typeMap = new HashMap<>();
     for (int i = 0; i < partNames.length; i++) {
       String previousValue = typeMap.put(partNames[i], partTypes[i]);
       Preconditions.checkArgument(previousValue == null, "Partition columns configuration is inconsistent. "

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index dbc6d8f..4bec228 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -736,7 +736,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     Path dataDir = null;
     if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
       //currently only Insert into T values(...) is supported thus only 1 values clause
-      //and only 1 target table are possible.  If/when support for 
+      //and only 1 target table are possible.  If/when support for
       //select ... from values(...) is added an insert statement may have multiple
       //encrypted target tables.
       dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
@@ -1556,7 +1556,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
       for (String alias : tabAliases) {
         String tab_name = qb.getTabNameForAlias(alias);
-        
+
         // we first look for this alias from CTE, and then from catalog.
         /*
          * if this s a CTE reference: Add its AST as a SubQuery to this QB.
@@ -6830,30 +6830,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
               .getColumnInfos()), input), rowResolver);
       input.setColumnExprMap(colExprMap);
     }
-
-    rowFields = opParseCtx.get(input).getRowResolver()
-        .getColumnInfos();
-    if (deleting()) {
-      // Figure out if we have partition columns in the list or not.  If so,
-      // add them into the mapping.  Partition columns will be located after the row id.
-      if (rowFields.size() > 1) {
-        // This means we have partition columns to deal with, so set up the mapping from the
-        // input to the partition columns.
-        dpCtx.mapInputToDP(rowFields.subList(1, rowFields.size()));
-      }
-    } else if (updating()) {
-      // In this case we expect the number of in fields to exceed the number of out fields by one
-      // (for the ROW__ID virtual column).  If there are more columns than this,
-      // then the extras are for dynamic partitioning
-      if (dynPart && dpCtx != null) {
-        dpCtx.mapInputToDP(rowFields.subList(tableFields.size() + 1, rowFields.size()));
-      }
-    } else {
-      if (dynPart && dpCtx != null) {
-        // create the mapping from input ExprNode to dest table DP column
-        dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size()));
-      }
-    }
     return input;
   }
 
@@ -10105,7 +10081,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         return;
       }
       for (Node child : node.getChildren()) {
-        //each insert of multi insert looks like 
+        //each insert of multi insert looks like
         //(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
         if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
           continue;

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
index 24db7d0..95d5635 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
@@ -19,14 +19,11 @@ package org.apache.hadoop.hive.ql.plan;
 
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.metadata.Table;
 
 public class DynamicPartitionCtx implements Serializable {
@@ -43,8 +40,6 @@ public class DynamicPartitionCtx implements Serializable {
   private Path rootPath; // the root path DP columns paths start from
   private int numBuckets;  // number of buckets in each partition
 
-  private Map<String, String> inputToDPCols; // mapping from input column names to DP columns
-
   private List<String> spNames; // sp column names
   private List<String> dpNames; // dp column names
   private String defaultPartName; // default partition name in case of null or empty value
@@ -71,7 +66,6 @@ public class DynamicPartitionCtx implements Serializable {
     }
     this.numDPCols = dpNames.size();
     this.numSPCols = spNames.size();
-    this.inputToDPCols = new HashMap<String, String>();
     if (this.numSPCols > 0) {
       this.spPath = Warehouse.makeDynamicPartName(partSpec);
     } else {
@@ -86,25 +80,12 @@ public class DynamicPartitionCtx implements Serializable {
     this.spPath = dp.spPath;
     this.rootPath = dp.rootPath;
     this.numBuckets = dp.numBuckets;
-    this.inputToDPCols = dp.inputToDPCols;
     this.spNames = dp.spNames;
     this.dpNames = dp.dpNames;
     this.defaultPartName = dp.defaultPartName;
     this.maxPartsPerNode = dp.maxPartsPerNode;
   }
 
-  public void mapInputToDP(List<ColumnInfo> fs) {
-
-      assert fs.size() == this.numDPCols: "input DP column size != numDPCols";
-
-      Iterator<ColumnInfo> itr1 = fs.iterator();
-      Iterator<String> itr2 = dpNames.iterator();
-
-      while (itr1.hasNext() && itr2.hasNext()) {
-        inputToDPCols.put(itr1.next().getInternalName(), itr2.next());
-      }
-  }
-
   public int getMaxPartitionsPerNode() {
     return this.maxPartsPerNode;
   }
@@ -161,14 +142,6 @@ public class DynamicPartitionCtx implements Serializable {
     this.spNames = sp;
   }
 
-  public Map<String, String> getInputToDPCols() {
-    return this.inputToDPCols;
-  }
-
-  public void setInputToDPCols(Map<String, String> map) {
-    this.inputToDPCols = map;
-  }
-
   public void setNumDPCols(int dp) {
     this.numDPCols = dp;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index c6ae030..9e89376 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.ValidTxnList;
-import org.apache.hadoop.hive.common.ValidReadTxnList;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.io.AcidInputFormat;
@@ -45,12 +44,11 @@ import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.stats.StatsAggregator;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
 import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
@@ -77,7 +75,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -94,8 +91,7 @@ public class TestFileSinkOperator {
   private static TableDesc nonAcidTableDescriptor;
   private static TableDesc acidTableDescriptor;
   private static ObjectInspector inspector;
-  private static List<TFSORow> rows;
-  private static ValidTxnList txnList;
+  private static List<Row> rows;
 
   private Path basePath;
   private JobConf jc;
@@ -105,34 +101,33 @@ public class TestFileSinkOperator {
     Properties properties = new Properties();
     properties.setProperty(serdeConstants.SERIALIZATION_LIB, TFSOSerDe.class.getName());
     nonAcidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
+    properties.setProperty(serdeConstants.LIST_COLUMNS,"data");
     properties = new Properties(properties);
     properties.setProperty(hive_metastoreConstants.BUCKET_COUNT, "1");
     acidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
-
     tmpdir = new File(System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") +
         "testFileSinkOperator");
     tmpdir.mkdir();
     tmpdir.deleteOnExit();
-    txnList = new ValidReadTxnList(new long[]{}, 2);
   }
 
   @Test
   public void testNonAcidWrite() throws Exception {
     setBasePath("write");
-    setupData(DataFormat.SIMPLE);
+    setupData(DataFormat.WITH_PARTITION_VALUE);
     FileSinkOperator op = getFileSink(AcidUtils.Operation.NOT_ACID, false, 0);
     processRows(op);
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
   @Test
   public void testInsert() throws Exception {
     setBasePath("insert");
-    setupData(DataFormat.SIMPLE);
+    setupData(DataFormat.WITH_PARTITION_VALUE);
     FileSinkOperator op = getFileSink(AcidUtils.Operation.INSERT, false, 1);
     processRows(op);
     Assert.assertEquals("10", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
   @Test
@@ -142,7 +137,7 @@ public class TestFileSinkOperator {
     FileSinkOperator op = getFileSink(AcidUtils.Operation.UPDATE, false, 2);
     processRows(op);
     Assert.assertEquals("0", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_RECORD_ID);
   }
 
   @Test
@@ -152,7 +147,7 @@ public class TestFileSinkOperator {
     FileSinkOperator op = getFileSink(AcidUtils.Operation.DELETE, false, 2);
     processRows(op);
     Assert.assertEquals("-10", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_RECORD_ID);
   }
 
   @Test
@@ -161,7 +156,7 @@ public class TestFileSinkOperator {
     setupData(DataFormat.WITH_PARTITION_VALUE);
     FileSinkOperator op = getFileSink(AcidUtils.Operation.NOT_ACID, true, 0);
     processRows(op);
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
 
@@ -174,7 +169,7 @@ public class TestFileSinkOperator {
     // We only expect 5 here because we'll get whichever of the partitions published its stats
     // last.
     Assert.assertEquals("5", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_PARTITION_VALUE);
   }
 
   @Test
@@ -184,19 +179,19 @@ public class TestFileSinkOperator {
     FileSinkOperator op = getFileSink(AcidUtils.Operation.UPDATE, true, 2);
     processRows(op);
     Assert.assertEquals("0", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_RECORD_ID_AND_PARTITION_VALUE);
   }
 
   @Test
   public void testDeleteDynamicPartitioning() throws Exception {
     setBasePath("deleteDP");
-    setupData(DataFormat.WITH_RECORD_ID_AND_PARTITION_VALUE);
+    setupData(DataFormat.WITH_RECORD_ID);
     FileSinkOperator op = getFileSink(AcidUtils.Operation.DELETE, true, 2);
     processRows(op);
     // We only expect -5 here because we'll get whichever of the partitions published its stats
     // last.
     Assert.assertEquals("-5", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
-    confirmOutput();
+    confirmOutput(DataFormat.WITH_RECORD_ID);
   }
 
 
@@ -217,64 +212,52 @@ public class TestFileSinkOperator {
 
   }
 
-  private enum DataFormat {SIMPLE, WITH_RECORD_ID, WITH_PARTITION_VALUE,
-    WITH_RECORD_ID_AND_PARTITION_VALUE};
+  private enum DataFormat {WITH_RECORD_ID, WITH_PARTITION_VALUE, WITH_RECORD_ID_AND_PARTITION_VALUE};
 
   private void setupData(DataFormat format) {
 
-    // Build object inspector
-    inspector = ObjectInspectorFactory.getReflectionObjectInspector
-        (TFSORow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
-    rows = new ArrayList<TFSORow>();
-
+    Class<?> rType;
     switch (format) {
-      case SIMPLE:
-        // Build rows
-        for (int i = 0; i < 10; i++) {
-          rows.add(
-              new TFSORow(
-                  new Text("mary had a little lamb")
-              )
-          );
-        }
+      case WITH_PARTITION_VALUE:
+        rType = RowWithPartVal.class;
         break;
-
       case WITH_RECORD_ID:
-        for (int i = 0; i < 10; i++) {
-          rows.add(
-              new TFSORow(
-                  new Text("its fleect was white as snow"),
-                  new RecordIdentifier(1, 1, i)
-              )
-          );
-        }
+        rType = RowWithRecID.class;
         break;
-
-      case WITH_PARTITION_VALUE:
-        for (int i = 0; i < 10; i++) {
-          rows.add(
-              new TFSORow(
-                  new Text("its fleect was white as snow"),
-                  (i < 5) ? new Text("Monday") : new Text("Tuesday")
-              )
-          );
-        }
-        break;
-
       case WITH_RECORD_ID_AND_PARTITION_VALUE:
-        for (int i = 0; i < 10; i++) {
-          rows.add(
-              new TFSORow(
-                  new Text("its fleect was white as snow"),
-                  (i < 5) ? new Text("Monday") : new Text("Tuesday"),
-                  new RecordIdentifier(1, 1, i)
-              )
-          );
-        }
+        rType = RowWithPartNRecID.class;
         break;
-
       default:
-        throw new RuntimeException("Unknown option!");
+        throw new RuntimeException("Unknown type");
+    }
+    inspector = ObjectInspectorFactory.getReflectionObjectInspector
+        (rType, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+
+    rows = new ArrayList<Row>();
+    Row r;
+    for (int i = 0; i < 10; i++) {
+      switch (format) {
+        case WITH_PARTITION_VALUE:
+          r =
+          new RowWithPartVal(
+              new Text("mary had a little lamb"),
+              (i < 5) ? new Text("Monday") : new Text("Tuesday"));
+          break;
+        case WITH_RECORD_ID:
+          r = new RowWithRecID(new RecordIdentifier(1, 1, i),
+              (i < 5) ? new Text("Monday") : new Text("Tuesday"));
+          break;
+        case WITH_RECORD_ID_AND_PARTITION_VALUE:
+          r = new RowWithPartNRecID(
+              new Text("its fleect was white as snow"),
+              (i < 5) ? new Text("Monday") : new Text("Tuesday"),
+                  new RecordIdentifier(1, 1, i));
+          break;
+        default:
+          throw new RuntimeException("Unknown data format");
+      }
+      rows.add(r);
+
     }
   }
 
@@ -300,9 +283,6 @@ public class TestFileSinkOperator {
       Map<String, String> partColMap= new LinkedHashMap<String, String>(1);
       partColMap.put(PARTCOL_NAME, null);
       DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(null, partColMap, "Sunday", 100);
-      Map<String, String> partColNames = new HashMap<String, String>(1);
-      partColNames.put(PARTCOL_NAME, PARTCOL_NAME);
-      dpCtx.setInputToDPCols(partColNames);
       //todo: does this need the finalDestination?
       desc = new FileSinkDesc(basePath, tableDesc, false, 1, false, false, 1, 1, partCols, dpCtx, null);
     } else {
@@ -320,27 +300,27 @@ public class TestFileSinkOperator {
   }
 
   private void processRows(FileSinkOperator op) throws HiveException {
-    for (TFSORow r : rows) op.process(r, 0);
+    for (Object r : rows) op.process(r, 0);
     op.jobCloseOp(jc, true);
     op.close(false);
   }
 
-  private void confirmOutput() throws IOException, SerDeException {
+  private void confirmOutput(DataFormat rType) throws IOException, SerDeException, CloneNotSupportedException {
     Path[] paths = findFilesInBasePath();
-    TFSOInputFormat input = new TFSOInputFormat();
+    TFSOInputFormat input = new TFSOInputFormat(rType);
     FileInputFormat.setInputPaths(jc, paths);
 
     InputSplit[] splits = input.getSplits(jc, 1);
-    RecordReader<NullWritable, TFSORow> reader = input.getRecordReader(splits[0], jc,
+    RecordReader<NullWritable, Row> reader = input.getRecordReader(splits[0], jc,
         Mockito.mock(Reporter.class));
     NullWritable key = reader.createKey();
-    TFSORow value = reader.createValue();
-    List<TFSORow> results = new ArrayList<TFSORow>(rows.size());
-    List<TFSORow> sortedRows = new ArrayList<TFSORow>(rows.size());
+    Row value = reader.createValue();
+    List<Row> results = new ArrayList<Row>(rows.size());
+    List<Row> sortedRows = new ArrayList<Row>(rows.size());
     for (int i = 0; i < rows.size(); i++) {
       Assert.assertTrue(reader.next(key, value));
-      results.add(new TFSORow(value));
-      sortedRows.add(new TFSORow(rows.get(i)));
+      results.add(value.clone());
+      sortedRows.add(rows.get(i));
     }
     Assert.assertFalse(reader.next(key, value));
     Collections.sort(results);
@@ -370,36 +350,172 @@ public class TestFileSinkOperator {
     }
   }
 
-  private static class TFSORow implements WritableComparable<TFSORow> {
+  public static interface Row extends WritableComparable<Row> {
+
+    Row clone() throws CloneNotSupportedException;
+  }
+
+  private static class RowWithRecID implements Row {
+
     private RecordIdentifier recId;
-    private Text data;
     private Text partVal;
 
-    TFSORow() {
-      this(null, null, null);
+    public RowWithRecID() {
+    }
+    public RowWithRecID(RecordIdentifier recId, Text partVal) {
+      super();
+      this.recId = recId;
+      this.partVal = partVal;
     }
 
-    TFSORow(Text t) {
-      this(t, null, null);
+    @Override
+    public
+    Row clone() throws CloneNotSupportedException {
+      return new RowWithRecID(this.recId, this.partVal);
     }
 
-    TFSORow(Text t, Text pv) {
-      this(t, pv, null);
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      if (partVal == null) {
+        dataOutput.writeBoolean(false);
+      } else {
+        dataOutput.writeBoolean(true);
+        partVal.write(dataOutput);
+      }
+      if (recId == null) {
+        dataOutput.writeBoolean(false);
+      } else {
+        dataOutput.writeBoolean(true);
+        recId.write(dataOutput);
+      }
     }
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+      boolean notNull = dataInput.readBoolean();
+      if (notNull) {
+        partVal = new Text();
+        partVal.readFields(dataInput);
+      }
+      notNull = dataInput.readBoolean();
+      if (notNull) {
+        recId = new RecordIdentifier();
+        recId.readFields(dataInput);
+      }
 
-    TFSORow(Text t, RecordIdentifier ri) {
-      this(t, null, ri);
     }
+    @Override
+    public int compareTo(Row row) {
+      RowWithRecID other = (RowWithRecID) row;
+      if (recId == null && other.recId == null) {
+        return comparePartVal(other);
+      } else if (recId == null) {
+        return -1;
+      } else {
+        int rc = recId.compareTo(other.recId);
+        if (rc == 0) return comparePartVal(other);
+        else return rc;
+      }
+    }
+    private int comparePartVal(RowWithRecID other) {
 
-    TFSORow(Text t, Text pv, RecordIdentifier ri) {
+        return partVal.compareTo(other.partVal);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      return compareTo((RowWithRecID)obj) == 0;
+    }
+  }
+  private static class RowWithPartVal implements Row {
+
+    public RowWithPartVal(Text data, Text partVal) {
+      super();
+      this.data = data;
+      this.partVal = partVal;
+    }
+
+    public RowWithPartVal() {
+    }
+
+    private  Text data;
+    private  Text partVal;
+
+    @Override
+    public Row clone() throws CloneNotSupportedException {
+      return new RowWithPartVal(this.data, this.partVal);
+    }
+
+    @Override
+    public void write(DataOutput dataOutput) throws IOException {
+      data.write(dataOutput);
+      if (partVal == null) {
+        dataOutput.writeBoolean(false);
+      } else {
+        dataOutput.writeBoolean(true);
+        partVal.write(dataOutput);
+      }
+    }
+
+    @Override
+    public void readFields(DataInput dataInput) throws IOException {
+      data = new Text();
+      data.readFields(dataInput);
+      boolean notNull = dataInput.readBoolean();
+      if (notNull) {
+        partVal = new Text();
+        partVal.readFields(dataInput);
+      }
+    }
+
+    @Override
+    public int compareTo(Row row) {
+      RowWithPartVal other = (RowWithPartVal) row;
+      if (partVal == null && other.partVal == null) {
+        return compareData(other);
+      } else if (partVal == null) {
+        return -1;
+      } else {
+        int rc = partVal.compareTo(other.partVal);
+        if (rc == 0) return compareData(other);
+        else return rc;
+      }
+    }
+
+    private int compareData(RowWithPartVal other) {
+      if (data == null && other.data == null) return 0;
+      else if (data == null) return -1;
+      else return data.compareTo(other.data);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof RowWithPartVal) {
+        RowWithPartVal other = (RowWithPartVal) obj;
+        return compareTo(other) == 0;
+
+      } else {
+        return false;
+      }
+    }
+  }
+  private static class RowWithPartNRecID implements Row {
+    private RecordIdentifier recId;
+    private Text data;
+    private Text partVal;
+
+    RowWithPartNRecID() {
+      this(null, null, null);
+    }
+
+    RowWithPartNRecID(Text t, Text pv, RecordIdentifier ri) {
       data = t;
       partVal = pv;
       recId = ri;
-
     }
 
-    TFSORow(TFSORow other) {
-      this(other.data, other.partVal, other.recId);
+    @Override
+    public RowWithPartNRecID clone() throws CloneNotSupportedException {
+      return new RowWithPartNRecID(this.data, this.partVal, this.recId);
     }
 
     @Override
@@ -437,8 +553,8 @@ public class TestFileSinkOperator {
 
     @Override
     public boolean equals(Object obj) {
-      if (obj instanceof TFSORow) {
-        TFSORow other = (TFSORow) obj;
+      if (obj instanceof RowWithPartNRecID) {
+        RowWithPartNRecID other = (RowWithPartNRecID) obj;
         if (data == null && other.data == null) return checkPartVal(other);
         else if (data == null) return false;
         else if (data.equals(other.data)) return checkPartVal(other);
@@ -448,21 +564,22 @@ public class TestFileSinkOperator {
       }
     }
 
-    private boolean checkPartVal(TFSORow other) {
+    private boolean checkPartVal(RowWithPartNRecID other) {
       if (partVal == null && other.partVal == null) return checkRecId(other);
       else if (partVal == null) return false;
       else if (partVal.equals(other.partVal)) return checkRecId(other);
       else return false;
     }
 
-    private boolean checkRecId(TFSORow other) {
+    private boolean checkRecId(RowWithPartNRecID other) {
       if (recId == null && other.recId == null) return true;
       else if (recId == null) return false;
       else return recId.equals(other.recId);
     }
 
     @Override
-    public int compareTo(TFSORow other) {
+    public int compareTo(Row row) {
+      RowWithPartNRecID other = (RowWithPartNRecID) row;
       if (recId == null && other.recId == null) {
         return comparePartVal(other);
       } else if (recId == null) {
@@ -474,7 +591,7 @@ public class TestFileSinkOperator {
       }
     }
 
-    private int comparePartVal(TFSORow other) {
+    private int comparePartVal(RowWithPartNRecID other) {
       if (partVal == null && other.partVal == null) {
         return compareData(other);
       } else if (partVal == null) {
@@ -486,21 +603,26 @@ public class TestFileSinkOperator {
       }
     }
 
-    private int compareData(TFSORow other) {
+    private int compareData(RowWithPartNRecID other) {
       if (data == null && other.data == null) return 0;
       else if (data == null) return -1;
       else return data.compareTo(other.data);
     }
   }
 
-  private static class TFSOInputFormat extends FileInputFormat<NullWritable, TFSORow>
-                                       implements AcidInputFormat<NullWritable, TFSORow> {
+  private static class TFSOInputFormat extends FileInputFormat<NullWritable, Row>
+                                       implements AcidInputFormat<NullWritable, Row> {
 
     FSDataInputStream in[] = null;
     int readingFrom = -1;
+    DataFormat rType;
+
+    public TFSOInputFormat(DataFormat rType) {
+      this.rType = rType;
+    }
 
     @Override
-    public RecordReader<NullWritable, TFSORow> getRecordReader(
+    public RecordReader<NullWritable, Row> getRecordReader(
         InputSplit inputSplit, JobConf entries, Reporter reporter) throws IOException {
       if (in == null) {
         Path paths[] = FileInputFormat.getInputPaths(entries);
@@ -511,10 +633,10 @@ public class TestFileSinkOperator {
         }
         readingFrom = 0;
       }
-      return new RecordReader<NullWritable, TFSORow>() {
+      return new RecordReader<NullWritable, Row>() {
 
         @Override
-        public boolean next(NullWritable nullWritable, TFSORow tfsoRecord) throws
+        public boolean next(NullWritable nullWritable, Row tfsoRecord) throws
             IOException {
           try {
             tfsoRecord.readFields(in[readingFrom]);
@@ -532,8 +654,18 @@ public class TestFileSinkOperator {
         }
 
         @Override
-        public TFSORow createValue() {
-          return new TFSORow();
+        public Row createValue() {
+          switch (rType) {
+            case WITH_RECORD_ID_AND_PARTITION_VALUE:
+              return new RowWithPartNRecID();
+            case WITH_PARTITION_VALUE:
+              return new RowWithPartVal();
+            case WITH_RECORD_ID:
+              return new RowWithRecID();
+
+            default:
+              throw new RuntimeException("Unknown row Type");
+          }
         }
 
         @Override
@@ -554,14 +686,14 @@ public class TestFileSinkOperator {
     }
 
     @Override
-    public RowReader<TFSORow> getReader(InputSplit split,
+    public RowReader<Row> getReader(InputSplit split,
                                            Options options) throws
         IOException {
       return null;
     }
 
     @Override
-    public RawReader<TFSORow> getRawReader(Configuration conf,
+    public RawReader<Row> getRawReader(Configuration conf,
                                               boolean collapseEvents,
                                               int bucket,
                                               ValidTxnList validTxnList,
@@ -578,9 +710,9 @@ public class TestFileSinkOperator {
     }
   }
 
-  public static class TFSOOutputFormat extends FileOutputFormat<NullWritable, TFSORow>
-      implements AcidOutputFormat<NullWritable, TFSORow> {
-    List<TFSORow> records = new ArrayList<TFSORow>();
+  public static class TFSOOutputFormat extends FileOutputFormat<NullWritable, Row>
+      implements AcidOutputFormat<NullWritable, Row> {
+    List<Row> records = new ArrayList<>();
     long numRecordsAdded = 0;
     FSDataOutputStream out = null;
 
@@ -588,7 +720,6 @@ public class TestFileSinkOperator {
     public RecordUpdater getRecordUpdater(final Path path, final Options options) throws
         IOException {
 
-      final StructObjectInspector inspector = (StructObjectInspector)options.getInspector();
       return new RecordUpdater() {
         @Override
         public void insert(long currentTransaction, Object row) throws IOException {
@@ -608,9 +739,8 @@ public class TestFileSinkOperator {
         }
 
         private void addRow(Object row) {
-          assert row instanceof TFSORow : "Expected TFSORow but got " +
-              row.getClass().getName();
-          records.add((TFSORow)row);
+          assert row instanceof Row : "Expected Row but got " + row.getClass().getName();
+          records.add((Row)row);
         }
 
         @Override
@@ -619,7 +749,7 @@ public class TestFileSinkOperator {
             FileSystem fs = path.getFileSystem(options.getConfiguration());
             out = fs.create(path);
           }
-          for (TFSORow r : records) r.write(out);
+          for (Writable r : records) r.write(out);
           records.clear();
           out.flush();
         }
@@ -657,8 +787,8 @@ public class TestFileSinkOperator {
       return new FileSinkOperator.RecordWriter() {
         @Override
         public void write(Writable w) throws IOException {
-          Assert.assertTrue(w instanceof TFSORow);
-          records.add((TFSORow) w);
+          Assert.assertTrue(w instanceof Row);
+          records.add((Row)w);
         }
 
         @Override
@@ -667,7 +797,7 @@ public class TestFileSinkOperator {
             FileSystem fs = finalOutPath.getFileSystem(jc);
             out = fs.create(finalOutPath);
           }
-          for (TFSORow r : records) r.write(out);
+          for (Writable r : records) r.write(out);
           records.clear();
           out.flush();
           out.close();
@@ -676,7 +806,7 @@ public class TestFileSinkOperator {
     }
 
     @Override
-    public RecordWriter<NullWritable, TFSORow> getRecordWriter(
+    public RecordWriter<NullWritable, Row> getRecordWriter(
         FileSystem fileSystem, JobConf entries, String s, Progressable progressable) throws
         IOException {
       return null;
@@ -688,7 +818,7 @@ public class TestFileSinkOperator {
     }
   }
 
-  public static class TFSOSerDe implements SerDe {
+  public static class TFSOSerDe extends AbstractSerDe {
 
     @Override
     public void initialize(Configuration conf, Properties tbl) throws SerDeException {
@@ -697,20 +827,18 @@ public class TestFileSinkOperator {
 
     @Override
     public Class<? extends Writable> getSerializedClass() {
-      return TFSORow.class;
+      return RowWithPartNRecID.class;
     }
 
     @Override
     public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
-      assert obj instanceof TFSORow : "Expected TFSORow or decendent, got "
-          + obj.getClass().getName();
-      return (TFSORow)obj;
+      assert obj instanceof Row : "Expected Row or decendent, got " + obj.getClass().getName();
+      return (Row)obj;
     }
 
     @Override
     public Object deserialize(Writable blob) throws SerDeException {
-      assert blob instanceof TFSORow : "Expected TFSORow or decendent, got "
-          + blob.getClass().getName();
+      assert blob instanceof Row : "Expected Row or decendent, got "+ blob.getClass().getName();
       return blob;
     }

[22/22] hive git commit: HIVE-12015 : LLAP: merge master into branch (Sergey Shelukhin)

Posted by se...@apache.org.

HIVE-12015 : LLAP: merge master into branch (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c5ccf669
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c5ccf669
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c5ccf669

Branch: refs/heads/llap
Commit: c5ccf6694490a63329b3f4d9040dd976abd9d790
Parents: a1bc2ef 5074423
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 19:38:52 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 19:38:52 2015 -0700

----------------------------------------------------------------------
 .../common/metrics/common/MetricsConstant.java  |   5 +
 .../hcatalog/pig/TestHCatLoaderEncryption.java  |   3 +
 .../hive/ql/security/FolderPermissionBase.java  |  53 +-
 .../upgrade/derby/021-HIVE-11970.derby.sql      |   6 +
 .../upgrade/derby/hive-schema-1.3.0.derby.sql   |  12 +-
 .../upgrade/derby/hive-schema-2.0.0.derby.sql   |  12 +-
 .../derby/upgrade-1.2.0-to-1.3.0.derby.sql      |   1 +
 .../derby/upgrade-1.2.0-to-2.0.0.derby.sql      |   3 +-
 .../upgrade/mssql/007-HIVE-11970.mssql.sql      |   6 +
 .../upgrade/mssql/hive-schema-1.3.0.mssql.sql   |  12 +-
 .../upgrade/mssql/hive-schema-2.0.0.mssql.sql   |  12 +-
 .../mssql/upgrade-1.2.0-to-1.3.0.mssql.sql      |   1 +
 .../mssql/upgrade-1.2.0-to-2.0.0.mssql.sql      |   7 +-
 .../upgrade/mysql/022-HIVE-11970.mysql.sql      |   6 +
 .../upgrade/mysql/hive-schema-1.3.0.mysql.sql   |  12 +-
 .../upgrade/mysql/hive-schema-2.0.0.mysql.sql   |  12 +-
 .../mysql/upgrade-1.2.0-to-1.3.0.mysql.sql      |   1 +
 .../mysql/upgrade-1.2.0-to-2.0.0.mysql.sql      |   2 +
 .../upgrade/oracle/022-HIVE-11970.oracle.sql    |  23 +
 .../upgrade/oracle/hive-schema-1.3.0.oracle.sql |  12 +-
 .../upgrade/oracle/hive-schema-2.0.0.oracle.sql |  12 +-
 .../oracle/upgrade-1.2.0-to-1.3.0.oracle.sql    |   2 +
 .../oracle/upgrade-1.2.0-to-2.0.0.oracle.sql    |   2 +
 .../postgres/021-HIVE-11970.postgres.sql        |   6 +
 .../postgres/hive-schema-1.3.0.postgres.sql     |  12 +-
 .../postgres/hive-schema-2.0.0.postgres.sql     |  12 +-
 .../upgrade-1.2.0-to-1.3.0.postgres.sql         |   1 +
 .../upgrade-1.2.0-to-2.0.0.postgres.sql         |   1 +
 .../hive/metastore/MetaStoreDirectSql.java      |  34 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   | 159 +++---
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  19 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  16 +
 .../hadoop/hive/ql/io/HiveFileFormatUtils.java  |  95 +++-
 .../hadoop/hive/ql/io/InputFormatChecker.java   |   5 +-
 .../hadoop/hive/ql/io/RCFileInputFormat.java    |   3 +-
 .../ql/io/SequenceFileInputFormatChecker.java   |   3 +-
 .../hive/ql/io/VectorizedRCFileInputFormat.java |   3 +-
 .../hive/ql/io/orc/ColumnStatisticsImpl.java    |  55 +-
 .../apache/hadoop/hive/ql/io/orc/InStream.java  |  25 +
 .../hive/ql/io/orc/MetadataReaderImpl.java      |   2 +-
 .../apache/hadoop/hive/ql/io/orc/OrcFile.java   |  33 +-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |   4 +-
 .../hadoop/hive/ql/io/orc/OrcOutputFormat.java  | 145 ++++-
 .../apache/hadoop/hive/ql/io/orc/OrcUtils.java  | 177 +-----
 .../hadoop/hive/ql/io/orc/ReaderImpl.java       |  45 +-
 .../hadoop/hive/ql/io/orc/TypeDescription.java  | 466 ++++++++++++++++
 .../ql/io/orc/VectorizedOrcInputFormat.java     |   2 +-
 .../apache/hadoop/hive/ql/io/orc/Writer.java    |   9 +
 .../hadoop/hive/ql/io/orc/WriterImpl.java       | 550 +++++++++----------
 .../hadoop/hive/ql/lockmgr/DbLockManager.java   |  21 +
 .../zookeeper/ZooKeeperHiveLockManager.java     |  41 ++
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 108 +---
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  |   2 +-
 .../optimizer/ConstantPropagateProcFactory.java |  11 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |  10 +-
 .../calcite/translator/HiveGBOpConvUtil.java    |  43 +-
 .../hadoop/hive/ql/parse/FromClauseParser.g     |  30 +-
 .../apache/hadoop/hive/ql/parse/HiveParser.g    |   7 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  33 +-
 .../hive/ql/plan/DynamicPartitionCtx.java       |  27 -
 .../hadoop/hive/ql/io/orc/orc_proto.proto       |   1 +
 .../hive/ql/exec/TestFileSinkOperator.java      | 386 ++++++++-----
 .../hive/ql/io/orc/TestColumnStatistics.java    |  43 +-
 .../hive/ql/io/orc/TestInputOutputFormat.java   |  15 +-
 .../hadoop/hive/ql/io/orc/TestOrcFile.java      |  41 +-
 .../hive/ql/io/orc/TestOrcRawRecordMerger.java  |   2 +-
 .../hadoop/hive/ql/io/orc/TestOrcWideTable.java | 224 +-------
 .../hive/ql/io/orc/TestTypeDescription.java     |  67 +++
 .../zookeeper/TestZookeeperLockManager.java     |  50 ++
 .../hive/ql/txn/compactor/CompactorTest.java    |   2 +-
 .../cbo_rp_gby2_map_multi_distinct.q            |  38 ++
 ql/src/test/queries/clientpositive/join_parse.q |  20 +
 .../queries/clientpositive/update_all_types.q   |   2 +-
 .../clientpositive/windowing_windowspec2.q      |  16 +-
 .../resources/orc-file-dump-bloomfilter.out     |   2 +-
 .../resources/orc-file-dump-bloomfilter2.out    |   2 +-
 .../orc-file-dump-dictionary-threshold.out      |   2 +-
 ql/src/test/resources/orc-file-dump.json        |   2 +-
 ql/src/test/resources/orc-file-dump.out         |   2 +-
 ql/src/test/resources/orc-file-has-null.out     |   2 +-
 .../clientnegative/cte_with_in_subquery.q.out   |   2 +-
 .../clientpositive/annotate_stats_part.q.out    |   6 +-
 .../clientpositive/annotate_stats_table.q.out   |   4 +-
 .../cbo_rp_gby2_map_multi_distinct.q.out        | 236 ++++++++
 .../dynpart_sort_opt_vectorization.q.out        |  16 +-
 .../dynpart_sort_optimization2.q.out            |   8 +-
 .../extrapolate_part_stats_full.q.out           |  24 +-
 .../extrapolate_part_stats_partial.q.out        |  76 +--
 .../extrapolate_part_stats_partial_ndv.q.out    |  38 +-
 .../results/clientpositive/join_parse.q.out     | 516 +++++++++++++++++
 .../results/clientpositive/orc_analyze.q.out    |  46 +-
 .../results/clientpositive/orc_file_dump.q.out  |  18 +-
 .../clientpositive/orc_int_type_promotion.q.out |   6 +-
 .../clientpositive/spark/vectorized_ptf.q.out   | 108 ++--
 .../tez/dynpart_sort_opt_vectorization.q.out    |  16 +-
 .../tez/dynpart_sort_optimization2.q.out        |   8 +-
 .../clientpositive/tez/orc_analyze.q.out        |  46 +-
 .../clientpositive/tez/union_fast_stats.q.out   |  16 +-
 .../clientpositive/tez/update_all_types.q.out   |   4 +-
 .../clientpositive/tez/vector_outer_join1.q.out |  48 +-
 .../clientpositive/tez/vector_outer_join4.q.out |  48 +-
 .../clientpositive/tez/vectorized_ptf.q.out     | 108 ++--
 .../clientpositive/union_fast_stats.q.out       |  16 +-
 .../clientpositive/update_all_types.q.out       |   4 +-
 .../results/clientpositive/vectorized_ptf.q.out | 104 ++--
 .../clientpositive/windowing_windowspec2.q.out  | 198 +++----
 106 files changed, 3256 insertions(+), 1815 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 1c350db,5b21af9..1d79aff
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@@ -3920,38 -3918,19 +3921,53 @@@ public final class Utilities 
      }
    }
  
 +  /**
 +   * Returns the full path to the Jar containing the class. It always return a JAR.
 +   *
 +   * @param klass
 +   *          class.
 +   *
 +   * @return path to the Jar containing the class.
 +   */
 +  @SuppressWarnings("rawtypes")
 +  public static String jarFinderGetJar(Class klass) {
 +    Preconditions.checkNotNull(klass, "klass");
 +    ClassLoader loader = klass.getClassLoader();
 +    if (loader != null) {
 +      String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
 +      try {
 +        for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) {
 +          URL url = (URL) itr.nextElement();
 +          String path = url.getPath();
 +          if (path.startsWith("file:")) {
 +            path = path.substring("file:".length());
 +          }
 +          path = URLDecoder.decode(path, "UTF-8");
 +          if ("jar".equals(url.getProtocol())) {
 +            path = URLDecoder.decode(path, "UTF-8");
 +            return path.replaceAll("!.*$", "");
 +          }
 +        }
 +      } catch (IOException e) {
 +        throw new RuntimeException(e);
 +      }
 +    }
 +    return null;
 +  }
 +
+   public static int getDPColOffset(FileSinkDesc conf) {
+ 
+     if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
+       // For deletes, there is only ROW__ID in non-partitioning, non-bucketing columns.
+       //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+       return 1;
+     } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
+       // For updates, ROW__ID is an extra column at index 0.
+       //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+       return getColumnNames(conf.getTableInfo().getProperties()).size() + 1;
+     } else {
+       return getColumnNames(conf.getTableInfo().getProperties()).size();
+     }
+ 
+   }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
index 8f0824b,6fec8b7..3dde0c4
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
@@@ -34,13 -35,12 +35,14 @@@ import com.google.protobuf.CodedInputSt
  public abstract class InStream extends InputStream {
  
    private static final Log LOG = LogFactory.getLog(InStream.class);
+   private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
  
 +  protected final Long fileId;
    protected final String name;
 -  protected final long length;
 +  protected long length;
  
 -  public InStream(String name, long length) {
 +  public InStream(Long fileId, String name, long length) {
 +    this.fileId = fileId;
      this.name = name;
      this.length = length;
    }
@@@ -468,9 -444,31 +470,32 @@@
                                  CompressionCodec codec,
                                  int bufferSize) throws IOException {
      if (codec == null) {
 -      return new UncompressedStream(name, input, length);
 +      return new UncompressedStream(fileId, name, input, length);
      } else {
 -      return new CompressedStream(name, input, length, codec, bufferSize);
 +      return new CompressedStream(fileId, name, input, length, codec, bufferSize);
      }
    }
+ 
+   /**
+    * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
+    *
+    * @param name       the name of the stream
+    * @param input      the list of ranges of bytes for the stream; from disk or cache
+    * @param length     the length in bytes of the stream
+    * @param codec      the compression codec
+    * @param bufferSize the compression buffer size
+    * @return coded input stream
+    * @throws IOException
+    */
 -  public static CodedInputStream createCodedInputStream(String name,
++  public static CodedInputStream createCodedInputStream(Long fileId,
++      String name,
+       List<DiskRange> input,
+       long length,
+       CompressionCodec codec,
+       int bufferSize) throws IOException {
 -    InStream inStream = create(name, input, length, codec, bufferSize);
++    InStream inStream = create(fileId, name, input, length, codec, bufferSize);
+     CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
+     codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
+     return codedInputStream;
+   }
  }

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
index 1456df3,0000000..5afba51
mode 100644,000000..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
@@@ -1,123 -1,0 +1,123 @@@
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.hadoop.hive.ql.io.orc;
 +
 +import java.io.IOException;
 +import java.nio.ByteBuffer;
 +import java.util.List;
 +
 +import org.apache.hadoop.fs.FSDataInputStream;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.hive.common.io.DiskRange;
 +import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
 +
 +import com.google.common.collect.Lists;
 +
 +public class MetadataReaderImpl implements MetadataReader {
 +  private final FSDataInputStream file;
 +  private final CompressionCodec codec;
 +  private final int bufferSize;
 +  private final int typeCount;
 +
 +  public MetadataReaderImpl(FileSystem fileSystem, Path path,
 +      CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
 +    this(fileSystem.open(path), codec, bufferSize, typeCount);
 +  }
 +
 +  public MetadataReaderImpl(FSDataInputStream file,
 +      CompressionCodec codec, int bufferSize, int typeCount) {
 +    this.file = file;
 +    this.codec = codec;
 +    this.bufferSize = bufferSize;
 +    this.typeCount = typeCount;
 +  }
 +
 +  @Override
 +  public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
 +      OrcProto.StripeFooter footer, boolean[] included, OrcProto.RowIndex[] indexes,
 +      boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
 +    if (footer == null) {
 +      footer = readStripeFooter(stripe);
 +    }
 +    if (indexes == null) {
 +      indexes = new OrcProto.RowIndex[typeCount];
 +    }
 +    if (bloomFilterIndices == null) {
 +      bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
 +    }
 +    long offset = stripe.getOffset();
 +    List<OrcProto.Stream> streams = footer.getStreamsList();
 +    for (int i = 0; i < streams.size(); i++) {
 +      OrcProto.Stream stream = streams.get(i);
 +      OrcProto.Stream nextStream = null;
 +      if (i < streams.size() - 1) {
 +        nextStream = streams.get(i+1);
 +      }
 +      int col = stream.getColumn();
 +      int len = (int) stream.getLength();
 +      // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
 +      // filter and combine the io to read row index and bloom filters for that column together
 +      if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
 +        boolean readBloomFilter = false;
 +        if (sargColumns != null && sargColumns[col] &&
 +            nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
 +          len += nextStream.getLength();
 +          i += 1;
 +          readBloomFilter = true;
 +        }
 +        if ((included == null || included[col]) && indexes[col] == null) {
 +          byte[] buffer = new byte[len];
 +          file.readFully(offset, buffer, 0, buffer.length);
 +          ByteBuffer bb = ByteBuffer.wrap(buffer);
 +          indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create(null, "index",
 +              Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
 +               codec, bufferSize));
 +          if (readBloomFilter) {
 +            bb.position((int) stream.getLength());
 +            bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
 +                null, "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
 +                nextStream.getLength(), codec, bufferSize));
 +          }
 +        }
 +      }
 +      offset += len;
 +    }
 +
 +    RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
 +    return index;
 +  }
 +
 +  @Override
 +  public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
 +    long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
 +    int tailLength = (int) stripe.getFooterLength();
 +
 +    // read the footer
 +    ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
 +    file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
-     return OrcProto.StripeFooter.parseFrom(InStream.create(null, "footer",
++    return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream(null, "footer",
 +        Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
 +        tailLength, codec, bufferSize));
 +  }
 +
 +  @Override
 +  public void close() throws IOException {
 +    file.close();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 04654bc,57bde3e..8941db1
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@@ -107,8 -106,8 +107,8 @@@ import com.google.common.util.concurren
   * that added this event. Insert and update events include the entire row, while
   * delete events have null for row.
   */
- public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
+ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 -  InputFormatChecker, VectorizedInputFormatInterface,
 +  InputFormatChecker, VectorizedInputFormatInterface, LlapWrappableInputFormatInterface,
      AcidInputFormat<NullWritable, OrcStruct>, CombineHiveInputFormat.AvoidSplitCombination {
  
    static enum SplitStrategyKind{

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index ce630bb,3bac48a..f3689fe
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@@ -423,47 -383,16 +419,16 @@@ public class ReaderImpl implements Read
        int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
      bb.position(footerAbsPos);
      bb.limit(footerAbsPos + footerSize);
-     InputStream instream = InStream.create(null, "footer", Lists.<DiskRange>newArrayList(
-           new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
-     return OrcProto.Footer.parseFrom(instream);
 -    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
++    return OrcProto.Footer.parseFrom(InStream.createCodedInputStream(null, "footer",
+         Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
    }
  
    private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
        int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
      bb.position(metadataAbsPos);
      bb.limit(metadataAbsPos + metadataSize);
-     InputStream instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
-         new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
-     CodedInputStream in = CodedInputStream.newInstance(instream);
-     int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
-     OrcProto.Metadata meta = null;
-     do {
-       try {
-         in.setSizeLimit(msgLimit);
-         meta = OrcProto.Metadata.parseFrom(in);
-       } catch (InvalidProtocolBufferException e) {
-         if (e.getMessage().contains("Protocol message was too large")) {
-           LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
-               " size of the coded input stream." );
- 
-           msgLimit = msgLimit << 1;
-           if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
-             LOG.error("Metadata section exceeds max protobuf message size of " +
-                 PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
-             throw e;
-           }
- 
-           // we must have failed in the middle of reading instream and instream doesn't support
-           // resetting the stream
-           instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
-               new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
-           in = CodedInputStream.newInstance(instream);
-         } else {
-           throw e;
-         }
-       }
-     } while (meta == null);
-     return meta;
 -    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
++    return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream(null, "metadata",
+         Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
    }
  
    private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --cc ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 7e81615,06e3362..e78f7aa
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@@ -656,17 -657,18 +657,18 @@@ public class TestOrcFile 
      Reader reader = OrcFile.createReader(testFilePath,
          OrcFile.readerOptions(conf).filesystem(fs));
  
-     assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+     TypeDescription schema = writer.getSchema();
+     assertEquals(2, schema.getMaximumId());
      boolean[] expected = new boolean[] {false, true, false};
-     boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+     boolean[] included = OrcUtils.includeColumns("int1", schema);
      assertEquals(true, Arrays.equals(expected, included));
  
 -    Metadata metadata = reader.getMetadata();
 -    int numStripes = metadata.getStripeStatistics().size();
 +    List<StripeStatistics> stats = reader.getStripeStatistics();
 +    int numStripes = stats.size();
      assertEquals(3, numStripes);
 -    StripeStatistics ss1 = metadata.getStripeStatistics().get(0);
 -    StripeStatistics ss2 = metadata.getStripeStatistics().get(1);
 -    StripeStatistics ss3 = metadata.getStripeStatistics().get(2);
 +    StripeStatistics ss1 = stats.get(0);
 +    StripeStatistics ss2 = stats.get(1);
 +    StripeStatistics ss3 = stats.get(2);
  
      assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
      assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
@@@ -777,9 -777,11 +777,9 @@@
          true, true, true, true};
      included = OrcUtils.includeColumns(
          "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
-         "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+         schema);
      assertEquals(true, Arrays.equals(expected, included));
  
 -    Metadata metadata = reader.getMetadata();
 -
      // check the stats
      ColumnStatistics[] stats = reader.getStatistics();
      assertEquals(2, stats[1].getNumberOfValues());

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
index 46a25e7,92e7163..c94c3f2
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
@@@ -562,10 -562,9 +562,10 @@@ STAGE PLANS
                        key expressions: _col0 (type: tinyint)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: tinyint)
-                       Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+                       Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
              Execution mode: vectorized
          Reducer 2 
 +            Execution mode: vectorized
              Reduce Operator Tree:
                Group By Operator
                  aggregations: count(VALUE._col0), sum(VALUE._col1)

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
index f0a9185,aa201ad..f66d5a4
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
@@@ -931,10 -931,9 +931,10 @@@ STAGE PLANS
                        key expressions: _col0 (type: tinyint)
                        sort order: +
                        Map-reduce partition columns: _col0 (type: tinyint)
-                       Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+                       Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
              Execution mode: vectorized
          Reducer 2 
 +            Execution mode: vectorized
              Reduce Operator Tree:
                Group By Operator
                  aggregations: count(VALUE._col0)

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/vectorized_ptf.q.out
----------------------------------------------------------------------