You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/02 04:37:36 UTC
[01/22] hive git commit: HIVE-11910: TestHCatLoaderEncryption should
shutdown created MiniDFS instance (Jason Dere, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/llap f272ccb25 -> c5ccf6694
HIVE-11910: TestHCatLoaderEncryption should shutdown created MiniDFS instance (Jason Dere, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b36cb379
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b36cb379
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b36cb379
Branch: refs/heads/llap
Commit: b36cb37963eb7f69621543f89eaa21ef1458e031
Parents: 064e37c
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed Sep 30 12:12:47 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed Sep 30 12:12:47 2015 -0700
----------------------------------------------------------------------
.../org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java | 3 +++
1 file changed, 3 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b36cb379/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
index 3b8076b..df3b72a 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderEncryption.java
@@ -426,6 +426,9 @@ public class TestHCatLoaderEncryption {
}
} finally {
FileUtils.deleteDirectory(new File(TEST_DATA_DIR));
+ if (dfs != null) {
+ dfs.shutdown();
+ }
}
}
[02/22] hive git commit: HIVE-11962 : Improve windowing_windowspec2.q
tests to return consistent results (Aihua Xu via Szehon)
Posted by se...@apache.org.
HIVE-11962 : Improve windowing_windowspec2.q tests to return consistent results (Aihua Xu via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/274847e2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/274847e2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/274847e2
Branch: refs/heads/llap
Commit: 274847e2a38164637701ac4c62802ac91cca4432
Parents: b36cb37
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 30 12:17:06 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 30 12:18:27 2015 -0700
----------------------------------------------------------------------
.../clientpositive/windowing_windowspec2.q | 16 +-
.../clientpositive/windowing_windowspec2.q.out | 198 +++++++++----------
2 files changed, 107 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/274847e2/ql/src/test/queries/clientpositive/windowing_windowspec2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec2.q b/ql/src/test/queries/clientpositive/windowing_windowspec2.q
index 3c5bc3d..0ec1e99 100644
--- a/ql/src/test/queries/clientpositive/windowing_windowspec2.q
+++ b/ql/src/test/queries/clientpositive/windowing_windowspec2.q
@@ -36,16 +36,16 @@ select ts, f, count(f) over (partition by ts order by f rows between 1 following
select ts, f, count(f) over (partition by ts order by f rows between unbounded preceding and 1 following) from over10k limit 100;
-- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100;
-select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100;
+select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100;
-- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100;
-select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100;
+select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100;
-- first_value
select ts, f, first_value(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/274847e2/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_windowspec2.q.out b/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
index e71a03f..a88eb6c 100644
--- a/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
+++ b/ql/src/test/results/clientpositive/windowing_windowspec2.q.out
@@ -1347,12 +1347,12 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 79.46 24
2013-03-01 09:11:58.703072 80.02 25
PREHOOK: query: -- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
POSTHOOK: query: -- max
-select ts, f, max(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, max(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1363,8 +1363,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 39.48
2013-03-01 09:11:58.70307 56.94 31.17
2013-03-01 09:11:58.70307 78.58 56.94
-2013-03-01 09:11:58.70307 38.61 78.58
2013-03-01 09:11:58.70307 14.78 78.58
+2013-03-01 09:11:58.70307 38.61 78.58
2013-03-01 09:11:58.70307 91.36 38.61
2013-03-01 09:11:58.70307 28.69 91.36
2013-03-01 09:11:58.70307 73.52 91.36
@@ -1397,10 +1397,10 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 68.62
2013-03-01 09:11:58.703071 54.09 80.43
2013-03-01 09:11:58.703071 42.08 80.43
-2013-03-01 09:11:58.703071 64.55 54.09
-2013-03-01 09:11:58.703071 48.89 64.55
+2013-03-01 09:11:58.703071 48.89 54.09
+2013-03-01 09:11:58.703071 64.55 48.89
2013-03-01 09:11:58.703071 56.45 64.55
-2013-03-01 09:11:58.703071 1.99 56.45
+2013-03-01 09:11:58.703071 1.99 64.55
2013-03-01 09:11:58.703071 94.27 56.45
2013-03-01 09:11:58.703071 35.32 94.27
2013-03-01 09:11:58.703071 10.62 94.27
@@ -1419,23 +1419,23 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 83.98
2013-03-01 09:11:58.703071 50.28 83.98
2013-03-01 09:11:58.703071 3.73 50.28
-2013-03-01 09:11:58.703071 53.26 50.28
-2013-03-01 09:11:58.703071 29.71 53.26
+2013-03-01 09:11:58.703071 29.71 50.28
+2013-03-01 09:11:58.703071 53.26 29.71
2013-03-01 09:11:58.703071 8.86 53.26
-2013-03-01 09:11:58.703071 21.01 29.71
+2013-03-01 09:11:58.703071 21.01 53.26
2013-03-01 09:11:58.703071 84.21 21.01
2013-03-01 09:11:58.703071 19.1 84.21
2013-03-01 09:11:58.703071 31.94 84.21
2013-03-01 09:11:58.703071 88.93 31.94
2013-03-01 09:11:58.703071 12.83 88.93
2013-03-01 09:11:58.703071 29.07 88.93
-2013-03-01 09:11:58.703071 61.88 29.07
-2013-03-01 09:11:58.703071 61.41 61.88
+2013-03-01 09:11:58.703071 61.41 29.07
+2013-03-01 09:11:58.703071 61.88 61.41
2013-03-01 09:11:58.703071 46.84 61.88
-2013-03-01 09:11:58.703072 95.01 NULL
-2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 62.09 NULL
+2013-03-01 09:11:58.703072 95.01 62.09
2013-03-01 09:11:58.703072 29.01 95.01
-2013-03-01 09:11:58.703072 79.46 62.09
+2013-03-01 09:11:58.703072 79.46 95.01
2013-03-01 09:11:58.703072 4.48 79.46
2013-03-01 09:11:58.703072 99.26 79.46
2013-03-01 09:11:58.703072 58.77 99.26
@@ -1453,14 +1453,14 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 56.7
2013-03-01 09:11:58.703072 88.08 39.3
2013-03-01 09:11:58.703072 0.48 88.08
-2013-03-01 09:11:58.703072 88.83 88.08
-2013-03-01 09:11:58.703072 9.0 88.83
-2013-03-01 09:11:58.703072 54.1 88.83
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+2013-03-01 09:11:58.703072 9.0 88.08
+2013-03-01 09:11:58.703072 88.83 9.0
+2013-03-01 09:11:58.703072 45.91 88.83
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1471,8 +1471,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 39.48
2013-03-01 09:11:58.70307 56.94 39.48
2013-03-01 09:11:58.70307 78.58 56.94
-2013-03-01 09:11:58.70307 38.61 78.58
2013-03-01 09:11:58.70307 14.78 78.58
+2013-03-01 09:11:58.70307 38.61 78.58
2013-03-01 09:11:58.70307 91.36 78.58
2013-03-01 09:11:58.70307 28.69 91.36
2013-03-01 09:11:58.70307 73.52 91.36
@@ -1505,8 +1505,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 99.45
2013-03-01 09:11:58.703071 54.09 99.45
2013-03-01 09:11:58.703071 42.08 99.45
-2013-03-01 09:11:58.703071 64.55 99.45
2013-03-01 09:11:58.703071 48.89 99.45
+2013-03-01 09:11:58.703071 64.55 99.45
2013-03-01 09:11:58.703071 56.45 99.45
2013-03-01 09:11:58.703071 1.99 99.45
2013-03-01 09:11:58.703071 94.27 99.45
@@ -1527,8 +1527,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 99.45
2013-03-01 09:11:58.703071 50.28 99.45
2013-03-01 09:11:58.703071 3.73 99.45
-2013-03-01 09:11:58.703071 53.26 99.45
2013-03-01 09:11:58.703071 29.71 99.45
+2013-03-01 09:11:58.703071 53.26 99.45
2013-03-01 09:11:58.703071 8.86 99.45
2013-03-01 09:11:58.703071 21.01 99.45
2013-03-01 09:11:58.703071 84.21 99.45
@@ -1537,11 +1537,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 99.45
2013-03-01 09:11:58.703071 12.83 99.45
2013-03-01 09:11:58.703071 29.07 99.45
-2013-03-01 09:11:58.703071 61.88 99.45
2013-03-01 09:11:58.703071 61.41 99.45
+2013-03-01 09:11:58.703071 61.88 99.45
2013-03-01 09:11:58.703071 46.84 99.45
-2013-03-01 09:11:58.703072 95.01 NULL
-2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 62.09 NULL
+2013-03-01 09:11:58.703072 95.01 62.09
2013-03-01 09:11:58.703072 29.01 95.01
2013-03-01 09:11:58.703072 79.46 95.01
2013-03-01 09:11:58.703072 4.48 95.01
@@ -1561,14 +1561,14 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 99.26
2013-03-01 09:11:58.703072 88.08 99.26
2013-03-01 09:11:58.703072 0.48 99.26
-2013-03-01 09:11:58.703072 88.83 99.26
2013-03-01 09:11:58.703072 9.0 99.26
-2013-03-01 09:11:58.703072 54.1 99.26
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+2013-03-01 09:11:58.703072 88.83 99.26
+2013-03-01 09:11:58.703072 45.91 99.26
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1579,8 +1579,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 78.58
2013-03-01 09:11:58.70307 56.94 78.58
2013-03-01 09:11:58.70307 78.58 38.61
-2013-03-01 09:11:58.70307 38.61 91.36
2013-03-01 09:11:58.70307 14.78 91.36
+2013-03-01 09:11:58.70307 38.61 91.36
2013-03-01 09:11:58.70307 91.36 73.52
2013-03-01 09:11:58.70307 28.69 92.96
2013-03-01 09:11:58.70307 73.52 95.04
@@ -1611,10 +1611,10 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 37.32 80.43
2013-03-01 09:11:58.703071 68.62 80.43
2013-03-01 09:11:58.703071 80.43 54.09
-2013-03-01 09:11:58.703071 54.09 64.55
+2013-03-01 09:11:58.703071 54.09 48.89
2013-03-01 09:11:58.703071 42.08 64.55
+2013-03-01 09:11:58.703071 48.89 64.55
2013-03-01 09:11:58.703071 64.55 56.45
-2013-03-01 09:11:58.703071 48.89 56.45
2013-03-01 09:11:58.703071 56.45 94.27
2013-03-01 09:11:58.703071 1.99 94.27
2013-03-01 09:11:58.703071 94.27 35.32
@@ -1633,23 +1633,23 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 27.23 83.98
2013-03-01 09:11:58.703071 83.98 50.28
2013-03-01 09:11:58.703071 31.84 50.28
-2013-03-01 09:11:58.703071 50.28 53.26
+2013-03-01 09:11:58.703071 50.28 29.71
2013-03-01 09:11:58.703071 3.73 53.26
-2013-03-01 09:11:58.703071 53.26 29.71
-2013-03-01 09:11:58.703071 29.71 21.01
+2013-03-01 09:11:58.703071 29.71 53.26
+2013-03-01 09:11:58.703071 53.26 21.01
2013-03-01 09:11:58.703071 8.86 84.21
2013-03-01 09:11:58.703071 21.01 84.21
2013-03-01 09:11:58.703071 84.21 31.94
2013-03-01 09:11:58.703071 19.1 88.93
2013-03-01 09:11:58.703071 31.94 88.93
2013-03-01 09:11:58.703071 88.93 29.07
-2013-03-01 09:11:58.703071 12.83 61.88
+2013-03-01 09:11:58.703071 12.83 61.41
2013-03-01 09:11:58.703071 29.07 61.88
-2013-03-01 09:11:58.703071 61.88 61.41
-2013-03-01 09:11:58.703071 61.41 46.84
+2013-03-01 09:11:58.703071 61.41 61.88
+2013-03-01 09:11:58.703071 61.88 46.84
2013-03-01 09:11:58.703071 46.84 NULL
-2013-03-01 09:11:58.703072 95.01 62.09
-2013-03-01 09:11:58.703072 62.09 79.46
+2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 95.01 79.46
2013-03-01 09:11:58.703072 29.01 79.46
2013-03-01 09:11:58.703072 79.46 99.26
2013-03-01 09:11:58.703072 4.48 99.26
@@ -1667,16 +1667,16 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 56.7 39.3
2013-03-01 09:11:58.703072 39.3 88.08
2013-03-01 09:11:58.703072 25.91 88.08
-2013-03-01 09:11:58.703072 88.08 88.83
+2013-03-01 09:11:58.703072 88.08 9.0
2013-03-01 09:11:58.703072 0.48 88.83
+2013-03-01 09:11:58.703072 9.0 88.83
2013-03-01 09:11:58.703072 88.83 54.1
-2013-03-01 09:11:58.703072 9.0 54.1
-2013-03-01 09:11:58.703072 54.1 45.91
-PREHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+2013-03-01 09:11:58.703072 45.91 54.1
+PREHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+POSTHOOK: query: select ts, f, max(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1687,8 +1687,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 56.94
2013-03-01 09:11:58.70307 56.94 78.58
2013-03-01 09:11:58.70307 78.58 78.58
-2013-03-01 09:11:58.70307 38.61 78.58
-2013-03-01 09:11:58.70307 14.78 91.36
+2013-03-01 09:11:58.70307 14.78 78.58
+2013-03-01 09:11:58.70307 38.61 91.36
2013-03-01 09:11:58.70307 91.36 91.36
2013-03-01 09:11:58.70307 28.69 91.36
2013-03-01 09:11:58.70307 73.52 92.96
@@ -1721,8 +1721,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 99.45
2013-03-01 09:11:58.703071 54.09 99.45
2013-03-01 09:11:58.703071 42.08 99.45
-2013-03-01 09:11:58.703071 64.55 99.45
2013-03-01 09:11:58.703071 48.89 99.45
+2013-03-01 09:11:58.703071 64.55 99.45
2013-03-01 09:11:58.703071 56.45 99.45
2013-03-01 09:11:58.703071 1.99 99.45
2013-03-01 09:11:58.703071 94.27 99.45
@@ -1743,8 +1743,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 99.45
2013-03-01 09:11:58.703071 50.28 99.45
2013-03-01 09:11:58.703071 3.73 99.45
-2013-03-01 09:11:58.703071 53.26 99.45
2013-03-01 09:11:58.703071 29.71 99.45
+2013-03-01 09:11:58.703071 53.26 99.45
2013-03-01 09:11:58.703071 8.86 99.45
2013-03-01 09:11:58.703071 21.01 99.45
2013-03-01 09:11:58.703071 84.21 99.45
@@ -1753,11 +1753,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 99.45
2013-03-01 09:11:58.703071 12.83 99.45
2013-03-01 09:11:58.703071 29.07 99.45
-2013-03-01 09:11:58.703071 61.88 99.45
2013-03-01 09:11:58.703071 61.41 99.45
+2013-03-01 09:11:58.703071 61.88 99.45
2013-03-01 09:11:58.703071 46.84 99.45
-2013-03-01 09:11:58.703072 95.01 95.01
2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 95.01 95.01
2013-03-01 09:11:58.703072 29.01 95.01
2013-03-01 09:11:58.703072 79.46 95.01
2013-03-01 09:11:58.703072 4.48 99.26
@@ -1777,16 +1777,16 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 99.26
2013-03-01 09:11:58.703072 88.08 99.26
2013-03-01 09:11:58.703072 0.48 99.26
-2013-03-01 09:11:58.703072 88.83 99.26
2013-03-01 09:11:58.703072 9.0 99.26
-2013-03-01 09:11:58.703072 54.1 99.26
+2013-03-01 09:11:58.703072 88.83 99.26
+2013-03-01 09:11:58.703072 45.91 99.26
PREHOOK: query: -- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
POSTHOOK: query: -- min
-select ts, f, min(f) over (partition by ts order by t rows between 2 preceding and 1 preceding) from over10k limit 100
+select ts, f, min(f) over (partition by ts order by t,f rows between 2 preceding and 1 preceding) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1797,10 +1797,10 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 17.85
2013-03-01 09:11:58.70307 56.94 17.85
2013-03-01 09:11:58.70307 78.58 31.17
-2013-03-01 09:11:58.70307 38.61 56.94
-2013-03-01 09:11:58.70307 14.78 38.61
+2013-03-01 09:11:58.70307 14.78 56.94
+2013-03-01 09:11:58.70307 38.61 14.78
2013-03-01 09:11:58.70307 91.36 14.78
-2013-03-01 09:11:58.70307 28.69 14.78
+2013-03-01 09:11:58.70307 28.69 38.61
2013-03-01 09:11:58.70307 73.52 28.69
2013-03-01 09:11:58.70307 92.96 28.69
2013-03-01 09:11:58.70307 95.04 73.52
@@ -1831,10 +1831,10 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 37.32
2013-03-01 09:11:58.703071 54.09 68.62
2013-03-01 09:11:58.703071 42.08 54.09
-2013-03-01 09:11:58.703071 64.55 42.08
2013-03-01 09:11:58.703071 48.89 42.08
+2013-03-01 09:11:58.703071 64.55 42.08
2013-03-01 09:11:58.703071 56.45 48.89
-2013-03-01 09:11:58.703071 1.99 48.89
+2013-03-01 09:11:58.703071 1.99 56.45
2013-03-01 09:11:58.703071 94.27 1.99
2013-03-01 09:11:58.703071 35.32 1.99
2013-03-01 09:11:58.703071 10.62 35.32
@@ -1853,8 +1853,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 27.23
2013-03-01 09:11:58.703071 50.28 31.84
2013-03-01 09:11:58.703071 3.73 31.84
-2013-03-01 09:11:58.703071 53.26 3.73
2013-03-01 09:11:58.703071 29.71 3.73
+2013-03-01 09:11:58.703071 53.26 3.73
2013-03-01 09:11:58.703071 8.86 29.71
2013-03-01 09:11:58.703071 21.01 8.86
2013-03-01 09:11:58.703071 84.21 8.86
@@ -1863,11 +1863,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 19.1
2013-03-01 09:11:58.703071 12.83 31.94
2013-03-01 09:11:58.703071 29.07 12.83
-2013-03-01 09:11:58.703071 61.88 12.83
-2013-03-01 09:11:58.703071 61.41 29.07
+2013-03-01 09:11:58.703071 61.41 12.83
+2013-03-01 09:11:58.703071 61.88 29.07
2013-03-01 09:11:58.703071 46.84 61.41
-2013-03-01 09:11:58.703072 95.01 NULL
-2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 62.09 NULL
+2013-03-01 09:11:58.703072 95.01 62.09
2013-03-01 09:11:58.703072 29.01 62.09
2013-03-01 09:11:58.703072 79.46 29.01
2013-03-01 09:11:58.703072 4.48 29.01
@@ -1887,14 +1887,14 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 39.3
2013-03-01 09:11:58.703072 88.08 25.91
2013-03-01 09:11:58.703072 0.48 25.91
-2013-03-01 09:11:58.703072 88.83 0.48
2013-03-01 09:11:58.703072 9.0 0.48
-2013-03-01 09:11:58.703072 54.1 9.0
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+2013-03-01 09:11:58.703072 88.83 0.48
+2013-03-01 09:11:58.703072 45.91 9.0
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 preceding) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 preceding) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -1905,8 +1905,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 14.54
2013-03-01 09:11:58.70307 56.94 14.54
2013-03-01 09:11:58.70307 78.58 14.54
-2013-03-01 09:11:58.70307 38.61 14.54
2013-03-01 09:11:58.70307 14.78 14.54
+2013-03-01 09:11:58.70307 38.61 14.54
2013-03-01 09:11:58.70307 91.36 14.54
2013-03-01 09:11:58.70307 28.69 14.54
2013-03-01 09:11:58.70307 73.52 14.54
@@ -1939,8 +1939,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 0.83
2013-03-01 09:11:58.703071 54.09 0.83
2013-03-01 09:11:58.703071 42.08 0.83
-2013-03-01 09:11:58.703071 64.55 0.83
2013-03-01 09:11:58.703071 48.89 0.83
+2013-03-01 09:11:58.703071 64.55 0.83
2013-03-01 09:11:58.703071 56.45 0.83
2013-03-01 09:11:58.703071 1.99 0.83
2013-03-01 09:11:58.703071 94.27 0.83
@@ -1961,8 +1961,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 0.83
2013-03-01 09:11:58.703071 50.28 0.83
2013-03-01 09:11:58.703071 3.73 0.83
-2013-03-01 09:11:58.703071 53.26 0.83
2013-03-01 09:11:58.703071 29.71 0.83
+2013-03-01 09:11:58.703071 53.26 0.83
2013-03-01 09:11:58.703071 8.86 0.83
2013-03-01 09:11:58.703071 21.01 0.83
2013-03-01 09:11:58.703071 84.21 0.83
@@ -1971,11 +1971,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 0.83
2013-03-01 09:11:58.703071 12.83 0.83
2013-03-01 09:11:58.703071 29.07 0.83
-2013-03-01 09:11:58.703071 61.88 0.83
2013-03-01 09:11:58.703071 61.41 0.83
+2013-03-01 09:11:58.703071 61.88 0.83
2013-03-01 09:11:58.703071 46.84 0.83
-2013-03-01 09:11:58.703072 95.01 NULL
-2013-03-01 09:11:58.703072 62.09 95.01
+2013-03-01 09:11:58.703072 62.09 NULL
+2013-03-01 09:11:58.703072 95.01 62.09
2013-03-01 09:11:58.703072 29.01 62.09
2013-03-01 09:11:58.703072 79.46 29.01
2013-03-01 09:11:58.703072 4.48 29.01
@@ -1995,14 +1995,14 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 0.79
2013-03-01 09:11:58.703072 88.08 0.79
2013-03-01 09:11:58.703072 0.48 0.79
-2013-03-01 09:11:58.703072 88.83 0.48
2013-03-01 09:11:58.703072 9.0 0.48
-2013-03-01 09:11:58.703072 54.1 0.48
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+2013-03-01 09:11:58.703072 88.83 0.48
+2013-03-01 09:11:58.703072 45.91 0.48
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between 1 following and 2 following) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between 1 following and 2 following) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -2011,10 +2011,10 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 39.48 17.85
2013-03-01 09:11:58.70307 17.85 31.17
2013-03-01 09:11:58.70307 31.17 56.94
-2013-03-01 09:11:58.70307 56.94 38.61
+2013-03-01 09:11:58.70307 56.94 14.78
2013-03-01 09:11:58.70307 78.58 14.78
-2013-03-01 09:11:58.70307 38.61 14.78
-2013-03-01 09:11:58.70307 14.78 28.69
+2013-03-01 09:11:58.70307 14.78 38.61
+2013-03-01 09:11:58.70307 38.61 28.69
2013-03-01 09:11:58.70307 91.36 28.69
2013-03-01 09:11:58.70307 28.69 73.52
2013-03-01 09:11:58.70307 73.52 92.96
@@ -2047,8 +2047,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 42.08
2013-03-01 09:11:58.703071 54.09 42.08
2013-03-01 09:11:58.703071 42.08 48.89
-2013-03-01 09:11:58.703071 64.55 48.89
-2013-03-01 09:11:58.703071 48.89 1.99
+2013-03-01 09:11:58.703071 48.89 56.45
+2013-03-01 09:11:58.703071 64.55 1.99
2013-03-01 09:11:58.703071 56.45 1.99
2013-03-01 09:11:58.703071 1.99 35.32
2013-03-01 09:11:58.703071 94.27 10.62
@@ -2069,8 +2069,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 3.73
2013-03-01 09:11:58.703071 50.28 3.73
2013-03-01 09:11:58.703071 3.73 29.71
-2013-03-01 09:11:58.703071 53.26 8.86
2013-03-01 09:11:58.703071 29.71 8.86
+2013-03-01 09:11:58.703071 53.26 8.86
2013-03-01 09:11:58.703071 8.86 21.01
2013-03-01 09:11:58.703071 21.01 19.1
2013-03-01 09:11:58.703071 84.21 19.1
@@ -2079,11 +2079,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 12.83
2013-03-01 09:11:58.703071 12.83 29.07
2013-03-01 09:11:58.703071 29.07 61.41
-2013-03-01 09:11:58.703071 61.88 46.84
2013-03-01 09:11:58.703071 61.41 46.84
+2013-03-01 09:11:58.703071 61.88 46.84
2013-03-01 09:11:58.703071 46.84 NULL
-2013-03-01 09:11:58.703072 95.01 29.01
2013-03-01 09:11:58.703072 62.09 29.01
+2013-03-01 09:11:58.703072 95.01 29.01
2013-03-01 09:11:58.703072 29.01 4.48
2013-03-01 09:11:58.703072 79.46 4.48
2013-03-01 09:11:58.703072 4.48 58.77
@@ -2103,14 +2103,14 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 0.48
2013-03-01 09:11:58.703072 88.08 0.48
2013-03-01 09:11:58.703072 0.48 9.0
-2013-03-01 09:11:58.703072 88.83 9.0
2013-03-01 09:11:58.703072 9.0 45.91
-2013-03-01 09:11:58.703072 54.1 0.36
-PREHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+2013-03-01 09:11:58.703072 88.83 45.91
+2013-03-01 09:11:58.703072 45.91 0.36
+PREHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
PREHOOK: type: QUERY
PREHOOK: Input: default@over10k
#### A masked pattern was here ####
-POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t rows between unbounded preceding and 1 following) from over10k limit 100
+POSTHOOK: query: select ts, f, min(f) over (partition by ts order by t,f rows between unbounded preceding and 1 following) from over10k limit 100
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over10k
#### A masked pattern was here ####
@@ -2121,8 +2121,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.70307 31.17 14.54
2013-03-01 09:11:58.70307 56.94 14.54
2013-03-01 09:11:58.70307 78.58 14.54
-2013-03-01 09:11:58.70307 38.61 14.54
2013-03-01 09:11:58.70307 14.78 14.54
+2013-03-01 09:11:58.70307 38.61 14.54
2013-03-01 09:11:58.70307 91.36 14.54
2013-03-01 09:11:58.70307 28.69 14.54
2013-03-01 09:11:58.70307 73.52 14.54
@@ -2155,8 +2155,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 80.43 0.83
2013-03-01 09:11:58.703071 54.09 0.83
2013-03-01 09:11:58.703071 42.08 0.83
-2013-03-01 09:11:58.703071 64.55 0.83
2013-03-01 09:11:58.703071 48.89 0.83
+2013-03-01 09:11:58.703071 64.55 0.83
2013-03-01 09:11:58.703071 56.45 0.83
2013-03-01 09:11:58.703071 1.99 0.83
2013-03-01 09:11:58.703071 94.27 0.83
@@ -2177,8 +2177,8 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 31.84 0.83
2013-03-01 09:11:58.703071 50.28 0.83
2013-03-01 09:11:58.703071 3.73 0.83
-2013-03-01 09:11:58.703071 53.26 0.83
2013-03-01 09:11:58.703071 29.71 0.83
+2013-03-01 09:11:58.703071 53.26 0.83
2013-03-01 09:11:58.703071 8.86 0.83
2013-03-01 09:11:58.703071 21.01 0.83
2013-03-01 09:11:58.703071 84.21 0.83
@@ -2187,11 +2187,11 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703071 88.93 0.83
2013-03-01 09:11:58.703071 12.83 0.83
2013-03-01 09:11:58.703071 29.07 0.83
-2013-03-01 09:11:58.703071 61.88 0.83
2013-03-01 09:11:58.703071 61.41 0.83
+2013-03-01 09:11:58.703071 61.88 0.83
2013-03-01 09:11:58.703071 46.84 0.83
-2013-03-01 09:11:58.703072 95.01 62.09
-2013-03-01 09:11:58.703072 62.09 29.01
+2013-03-01 09:11:58.703072 62.09 62.09
+2013-03-01 09:11:58.703072 95.01 29.01
2013-03-01 09:11:58.703072 29.01 29.01
2013-03-01 09:11:58.703072 79.46 4.48
2013-03-01 09:11:58.703072 4.48 4.48
@@ -2211,9 +2211,9 @@ POSTHOOK: Input: default@over10k
2013-03-01 09:11:58.703072 25.91 0.79
2013-03-01 09:11:58.703072 88.08 0.48
2013-03-01 09:11:58.703072 0.48 0.48
-2013-03-01 09:11:58.703072 88.83 0.48
2013-03-01 09:11:58.703072 9.0 0.48
-2013-03-01 09:11:58.703072 54.1 0.48
+2013-03-01 09:11:58.703072 88.83 0.48
+2013-03-01 09:11:58.703072 45.91 0.48
PREHOOK: query: -- first_value
select ts, f, first_value(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100
PREHOOK: type: QUERY
[03/22] hive git commit: HIVE-11903 : Add lock metrics to HS2
(Yongzhi Chen via Szehon)
Posted by se...@apache.org.
HIVE-11903 : Add lock metrics to HS2 (Yongzhi Chen via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/50b6d0c6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/50b6d0c6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/50b6d0c6
Branch: refs/heads/llap
Commit: 50b6d0c6bbf3ba838d3851ccc7cad0575c39732c
Parents: 274847e
Author: Szehon Ho <sz...@cloudera.com>
Authored: Wed Sep 30 12:21:30 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Wed Sep 30 12:21:30 2015 -0700
----------------------------------------------------------------------
.../common/metrics/common/MetricsConstant.java | 5 ++
.../hadoop/hive/ql/lockmgr/DbLockManager.java | 21 ++++++++
.../zookeeper/ZooKeeperHiveLockManager.java | 41 ++++++++++++++++
.../zookeeper/TestZookeeperLockManager.java | 50 ++++++++++++++++++++
4 files changed, 117 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
index 13c3cf9..88a3c29 100644
--- a/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
+++ b/common/src/java/org/apache/hadoop/hive/common/metrics/common/MetricsConstant.java
@@ -33,4 +33,9 @@ public class MetricsConstant {
public static String JDO_ROLLBACK_TRANSACTIONS = "rollbacked_jdo_transactions";
public static String JDO_COMMIT_TRANSACTIONS = "committed_jdo_transactions";
public static String JDO_OPEN_TRANSACTIONS = "opened_jdo_transactions";
+
+ public static String METASTORE_HIVE_LOCKS = "metastore_hive_locks";
+ public static String ZOOKEEPER_HIVE_SHAREDLOCKS = "zookeeper_hive_sharedlocks";
+ public static String ZOOKEEPER_HIVE_EXCLUSIVELOCKS = "zookeeper_hive_exclusivelocks";
+ public static String ZOOKEEPER_HIVE_SEMISHAREDLOCKS = "zookeeper_hive_semisharedlocks";
}
http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
index 82e227f..bb9da9d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java
@@ -20,6 +20,9 @@ package org.apache.hadoop.hive.ql.lockmgr;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.*;
@@ -99,6 +102,16 @@ public class DbLockManager implements HiveLockManager{
throw new LockException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
}
acquiredLocks.add(hl);
+
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ metrics.incrementCounter(MetricsConstant.METASTORE_HIVE_LOCKS);
+ } catch (Exception e) {
+ LOG.warn("Error Reporting hive client metastore lock operation to Metrics system", e);
+ }
+ }
+
return res.getState();
} catch (NoSuchTxnException e) {
LOG.error("Metastore could not find txnid " + lock.getTxnid());
@@ -133,6 +146,14 @@ public class DbLockManager implements HiveLockManager{
LOG.debug("Unlocking " + hiveLock);
client.unlock(lockId);
boolean removed = locks.remove(hiveLock);
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ metrics.decrementCounter(MetricsConstant.METASTORE_HIVE_LOCKS);
+ } catch (Exception e) {
+ LOG.warn("Error Reporting hive client metastore unlock operation to Metrics system", e);
+ }
+ }
LOG.debug("Removed a lock " + removed);
} catch (NoSuchLockException e) {
LOG.error("Metastore could find no record of lock " + JavaUtils.lockIdToString(lockId));
http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
index fb954d8..7c7a8d1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/zookeeper/ZooKeeperHiveLockManager.java
@@ -19,8 +19,12 @@
package org.apache.hadoop.hive.ql.lockmgr.zookeeper;
import com.google.common.annotations.VisibleForTesting;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.metrics.common.Metrics;
+import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.lockmgr.*;
@@ -402,7 +406,25 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
return null;
}
}
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ switch(mode) {
+ case EXCLUSIVE:
+ metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_EXCLUSIVELOCKS);
+ break;
+ case SEMI_SHARED:
+ metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SEMISHAREDLOCKS);
+ break;
+ default:
+ metrics.incrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SHAREDLOCKS);
+ break;
+ }
+ } catch (Exception e) {
+ LOG.warn("Error Reporting hive client zookeeper lock operation to Metrics system", e);
+ }
+ }
return new ZooKeeperHiveLock(res, key, mode);
}
@@ -438,6 +460,7 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
@VisibleForTesting
static void unlockPrimitive(HiveLock hiveLock, String parent, CuratorFramework curatorFramework) throws LockException {
ZooKeeperHiveLock zLock = (ZooKeeperHiveLock)hiveLock;
+ HiveLockMode lMode = hiveLock.getHiveLockMode();
HiveLockObject obj = zLock.getHiveLockObject();
String name = getLastObjectName(parent, obj);
try {
@@ -448,6 +471,24 @@ public class ZooKeeperHiveLockManager implements HiveLockManager {
if (children == null || children.isEmpty()) {
curatorFramework.delete().forPath(name);
}
+ Metrics metrics = MetricsFactory.getInstance();
+ if (metrics != null) {
+ try {
+ switch(lMode) {
+ case EXCLUSIVE:
+ metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_EXCLUSIVELOCKS);
+ break;
+ case SEMI_SHARED:
+ metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SEMISHAREDLOCKS);
+ break;
+ default:
+ metrics.decrementCounter(MetricsConstant.ZOOKEEPER_HIVE_SHAREDLOCKS);
+ break;
+ }
+ } catch (Exception e) {
+ LOG.warn("Error Reporting hive client zookeeper unlock operation to Metrics system", e);
+ }
+ }
} catch (KeeperException.NoNodeException nne) {
//can happen in retrying deleting the zLock after exceptions like InterruptedException
//or in a race condition where parent has already been deleted by other process when it
http://git-wip-us.apache.org/repos/asf/hive/blob/50b6d0c6/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
index 4a1ef2e..7fcaa22 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/lockmgr/zookeeper/TestZookeeperLockManager.java
@@ -18,7 +18,14 @@
package org.apache.hadoop.hive.ql.lockmgr.zookeeper;
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
+import org.apache.hadoop.hive.common.metrics.metrics2.MetricsReporting;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
@@ -33,6 +40,9 @@ import org.junit.Before;
import org.junit.After;
import org.junit.Test;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
public class TestZookeeperLockManager {
private HiveConf conf;
@@ -110,5 +120,45 @@ public class TestZookeeperLockManager {
conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, "9999");
Assert.assertEquals("node1:5666,node2:9999,node3:9999", ZooKeeperHiveHelper.getQuorumServers(conf));
}
+
+ @Test
+ public void testMetrics() throws Exception{
+ conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_QUORUM, "localhost");
+ conf.setVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_CLIENT_PORT, String.valueOf(server.getPort()));
+ File workDir = new File(System.getProperty("test.tmp.dir"));
+ File jsonReportFile = new File(workDir, "json_reportingzk1");
+ jsonReportFile.delete();
+ conf.setBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true);
+ conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
+ conf.setVar(HiveConf.ConfVars.HIVE_METRICS_REPORTER, MetricsReporting.JSON_FILE.name() + "," + MetricsReporting.JMX.name());
+ conf.setVar(HiveConf.ConfVars.HIVE_METRICS_JSON_FILE_LOCATION, jsonReportFile.toString());
+ conf.setVar(HiveConf.ConfVars.HIVE_METRICS_JSON_FILE_INTERVAL, "100ms");
+ MetricsFactory.init(conf);
+
+ HiveLockManagerCtx ctx = new HiveLockManagerCtx(conf);
+ ZooKeeperHiveLockManager zMgr= new ZooKeeperHiveLockManager();
+ zMgr.setContext(ctx);
+ ZooKeeperHiveLock curLock = zMgr.lock(hiveLock, HiveLockMode.SHARED, false);
+ Thread.sleep(2000);
+ byte[] jsonData = Files.readAllBytes(Paths.get(jsonReportFile.getAbsolutePath()));
+ ObjectMapper objectMapper = new ObjectMapper();
+ JsonNode rootNode = objectMapper.readTree(jsonData);
+ JsonNode countersNode = rootNode.path("counters");
+ JsonNode zkLockNode = countersNode.path("zookeeper_hive_sharedlocks");
+ JsonNode zkLockCountNode = zkLockNode.path("count");
+ Assert.assertTrue(zkLockCountNode.asInt() == 1);
+
+ zMgr.unlock(curLock);
+ Thread.sleep(2000);
+ jsonData = Files.readAllBytes(Paths.get(jsonReportFile.getAbsolutePath()));
+ objectMapper = new ObjectMapper();
+ rootNode = objectMapper.readTree(jsonData);
+ countersNode = rootNode.path("counters");
+ zkLockNode = countersNode.path("zookeeper_hive_sharedlocks");
+ zkLockCountNode = zkLockNode.path("count");
+ Assert.assertTrue(zkLockCountNode.asInt() == 0);
+ zMgr.close();
+ }
+
}
[19/22] hive git commit: HIVE-11928: ORC footer section can also
exceed protobuf message limit (Prasanth Jayachandran reviewed by Sergey
Shelukhin and Owen O'Malley)
Posted by se...@apache.org.
HIVE-11928: ORC footer section can also exceed protobuf message limit (Prasanth Jayachandran reviewed by Sergey Shelukhin and Owen O'Malley)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/467a117e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/467a117e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/467a117e
Branch: refs/heads/llap
Commit: 467a117edeb40074957d222386e1800194322a29
Parents: 947871a
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Oct 1 17:04:00 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Oct 1 17:04:00 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/io/orc/InStream.java | 24 +++++++++++
.../hadoop/hive/ql/io/orc/MetadataReader.java | 2 +-
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 43 ++------------------
3 files changed, 29 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
index 381d97d..6fec8b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
@@ -30,10 +30,12 @@ import org.apache.hadoop.hive.common.DiskRange;
import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
import com.google.common.annotations.VisibleForTesting;
+import com.google.protobuf.CodedInputStream;
public abstract class InStream extends InputStream {
private static final Log LOG = LogFactory.getLog(InStream.class);
+ private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
protected final String name;
protected final long length;
@@ -447,4 +449,26 @@ public abstract class InStream extends InputStream {
return new CompressedStream(name, input, length, codec, bufferSize);
}
}
+
+ /**
+ * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
+ *
+ * @param name the name of the stream
+ * @param input the list of ranges of bytes for the stream; from disk or cache
+ * @param length the length in bytes of the stream
+ * @param codec the compression codec
+ * @param bufferSize the compression buffer size
+ * @return coded input stream
+ * @throws IOException
+ */
+ public static CodedInputStream createCodedInputStream(String name,
+ List<DiskRange> input,
+ long length,
+ CompressionCodec codec,
+ int bufferSize) throws IOException {
+ InStream inStream = create(name, input, length, codec, bufferSize);
+ CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
+ codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
+ return codedInputStream;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
index 43d2933..1910214 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReader.java
@@ -108,7 +108,7 @@ public class MetadataReader {
// read the footer
ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
- return OrcProto.StripeFooter.parseFrom(InStream.create("footer",
+ return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream("footer",
Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
tailLength, codec, bufferSize));
}
http://git-wip-us.apache.org/repos/asf/hive/blob/467a117e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 36fb858..3bac48a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -19,7 +19,6 @@
package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
@@ -48,15 +47,12 @@ import org.apache.hadoop.io.Text;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.protobuf.CodedInputStream;
-import com.google.protobuf.InvalidProtocolBufferException;
public class ReaderImpl implements Reader {
private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
- private static final int DEFAULT_PROTOBUF_MESSAGE_LIMIT = 64 << 20; // 64MB
- private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
protected final FileSystem fileSystem;
protected final Path path;
@@ -387,47 +383,16 @@ public class ReaderImpl implements Reader {
int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(footerAbsPos);
bb.limit(footerAbsPos + footerSize);
- InputStream instream = InStream.create("footer", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
- return OrcProto.Footer.parseFrom(instream);
+ return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
}
private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(metadataAbsPos);
bb.limit(metadataAbsPos + metadataSize);
- InputStream instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
- CodedInputStream in = CodedInputStream.newInstance(instream);
- int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
- OrcProto.Metadata meta = null;
- do {
- try {
- in.setSizeLimit(msgLimit);
- meta = OrcProto.Metadata.parseFrom(in);
- } catch (InvalidProtocolBufferException e) {
- if (e.getMessage().contains("Protocol message was too large")) {
- LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
- " size of the coded input stream." );
-
- msgLimit = msgLimit << 1;
- if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
- LOG.error("Metadata section exceeds max protobuf message size of " +
- PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
- throw e;
- }
-
- // we must have failed in the middle of reading instream and instream doesn't support
- // resetting the stream
- instream = InStream.create("metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
- in = CodedInputStream.newInstance(instream);
- } else {
- throw e;
- }
- }
- } while (meta == null);
- return meta;
+ return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
}
private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
[20/22] hive git commit: HIVE-11995 : Remove repetitively setting
permissions in insert/load overwrite partition (Chaoyu Tang via Szehon)
Posted by se...@apache.org.
HIVE-11995 : Remove repetitively setting permissions in insert/load overwrite partition (Chaoyu Tang via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/50744231
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/50744231
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/50744231
Branch: refs/heads/llap
Commit: 507442319985198466b4f6c2ba18c6b068d8435e
Parents: 467a117
Author: Szehon Ho <sz...@cloudera.com>
Authored: Thu Oct 1 15:29:36 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Thu Oct 1 15:29:36 2015 -0700
----------------------------------------------------------------------
.../hive/ql/security/FolderPermissionBase.java | 53 +++++++--
.../apache/hadoop/hive/ql/metadata/Hive.java | 108 ++++---------------
2 files changed, 65 insertions(+), 96 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/50744231/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
index d98082f..d7149a7 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/FolderPermissionBase.java
@@ -261,6 +261,7 @@ public abstract class FolderPermissionBase {
//insert overwrite test
setPermission(warehouseDir + "/" + tableName, 1);
+ setPermission(warehouseDir + "/" + tableName + "/part1=1", 1);
ret = driver.run("insert overwrite table " + tableName + " partition(part1='1') select key,value from mysrc where part1='1' and part2='1'");
Assert.assertEquals(0, ret.getResponseCode());
@@ -297,6 +298,9 @@ public abstract class FolderPermissionBase {
//insert overwrite test
setPermission(warehouseDir + "/" + tableName, 1);
+ setPermission(warehouseDir + "/" + tableName + "/part1=1", 1);
+ setPermission(warehouseDir + "/" + tableName + "/part1=1/part2=1", 1);
+
ret = driver.run("insert overwrite table " + tableName + " partition(part1='1', part2='1') select key,value from mysrc where part1='1' and part2='1'");
Assert.assertEquals(0, ret.getResponseCode());
@@ -325,8 +329,9 @@ public abstract class FolderPermissionBase {
verifyDualPartitionTable(warehouseDir + "/" + tableName, 0);
- //Insert overwrite test, with permission set 1.
- setPermission(warehouseDir + "/" + tableName, 1);
+ //Insert overwrite test, with permission set 1. We need reset existing partitions to 1 since the permissions
+ //should be inherited from existing partition
+ setDualPartitionTable(warehouseDir + "/" + tableName, 1);
ret = driver.run("insert overwrite table " + tableName + " partition (part1,part2) select key,value,part1,part2 from mysrc");
Assert.assertEquals(0, ret.getResponseCode());
@@ -348,8 +353,9 @@ public abstract class FolderPermissionBase {
Assert.assertEquals(0,ret.getResponseCode());
verifySinglePartition(tableLoc, 0);
- //Insert overwrite test, with permission set 1.
- setPermission(tableLoc, 1);
+ //Insert overwrite test, with permission set 1. We need reset existing partitions to 1 since the permissions
+ //should be inherited from existing partition
+ setSinglePartition(tableLoc, 1);
ret = driver.run("insert overwrite table " + tableName + " partition (part1) select key,value,part1 from mysrc");
Assert.assertEquals(0,ret.getResponseCode());
verifySinglePartition(tableLoc, 1);
@@ -458,6 +464,9 @@ public abstract class FolderPermissionBase {
//case1B: load data local into overwrite non-partitioned-table
setPermission(warehouseDir + "/" + tableName, 1);
+ for (String child : listStatus(tableLoc)) {
+ setPermission(child, 1);
+ }
ret = driver.run("load data local inpath '" + dataFilePath + "' overwrite into table " + tableName);
Assert.assertEquals(0,ret.getResponseCode());
@@ -485,8 +494,13 @@ public abstract class FolderPermissionBase {
verifyPermission(child);
}
- //case 2B: insert data overwrite into non-partitioned table.
+ //case 2B: insert data overwrite into partitioned table. set testing table/partition folder hierarchy 1
+ //local load overwrite just overwrite the existing partition content but not the permission
setPermission(tableLoc, 1);
+ setPermission(partLoc, 1);
+ for (String child : listStatus(partLoc)) {
+ setPermission(child, 1);
+ }
ret = driver.run("LOAD DATA LOCAL INPATH '" + dataFilePath + "' OVERWRITE INTO TABLE " + tableName + " PARTITION (part1='1',part2='1')");
Assert.assertEquals(0,ret.getResponseCode());
@@ -521,6 +535,10 @@ public abstract class FolderPermissionBase {
//case1B: load data into overwrite non-partitioned-table
setPermission(warehouseDir + "/" + tableName, 1);
+ for (String child : listStatus(tableLoc)) {
+ setPermission(child, 1);
+ }
+
fs.copyFromLocalFile(dataFilePath, new Path(location));
ret = driver.run("load data inpath '" + location + "' overwrite into table " + tableName);
Assert.assertEquals(0,ret.getResponseCode());
@@ -550,8 +568,15 @@ public abstract class FolderPermissionBase {
verifyPermission(child);
}
- //case 2B: insert data overwrite into non-partitioned table.
+ //case 2B: insert data overwrite into partitioned table. set testing table/partition folder hierarchy 1
+ //load overwrite just overwrite the existing partition content but not the permission
setPermission(tableLoc, 1);
+ setPermission(partLoc, 1);
+ Assert.assertTrue(listStatus(partLoc).size() > 0);
+ for (String child : listStatus(partLoc)) {
+ setPermission(child, 1);
+ }
+
fs.copyFromLocalFile(dataFilePath, new Path(location));
ret = driver.run("LOAD DATA INPATH '" + location + "' OVERWRITE INTO TABLE " + tableName + " PARTITION (part1='1',part2='1')");
Assert.assertEquals(0,ret.getResponseCode());
@@ -693,7 +718,12 @@ public abstract class FolderPermissionBase {
assertExistence(partition);
verifyPermission(partition);
}
-
+
+ private void setSinglePartition(String tableLoc, int index) throws Exception {
+ setPermission(tableLoc + "/part1=1", index);
+ setPermission(tableLoc + "/part1=2", index);
+ }
+
private void verifySinglePartition(String tableLoc, int index) throws Exception {
verifyPermission(tableLoc + "/part1=1", index);
verifyPermission(tableLoc + "/part1=2", index);
@@ -709,6 +739,15 @@ public abstract class FolderPermissionBase {
}
}
+ private void setDualPartitionTable(String baseTablePath, int index) throws Exception {
+ setPermission(baseTablePath, index);
+ setPermission(baseTablePath + "/part1=1", index);
+ setPermission(baseTablePath + "/part1=1/part2=1", index);
+
+ setPermission(baseTablePath + "/part1=2", index);
+ setPermission(baseTablePath + "/part1=2/part2=2", index);
+ }
+
private void verifyDualPartitionTable(String baseTablePath, int index) throws Exception {
verifyPermission(baseTablePath, index);
verifyPermission(baseTablePath + "/part1=1", index);
http://git-wip-us.apache.org/repos/asf/hive/blob/50744231/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 10cafb6..8efbb05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2932,8 +2932,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
LOG.info("No sources specified to move: " + srcf);
return;
}
- List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf,
- true);
+ List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);
if (oldPath != null) {
try {
@@ -2945,9 +2944,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
if (FileUtils.isSubDir(oldPath, destf, fs2)) {
FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
}
- if (inheritPerms) {
- inheritFromTable(tablePath, destf, conf, destFs);
- }
}
} catch (Exception e) {
//swallow the exception
@@ -2955,58 +2951,24 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
- // rename src directory to destf
- if (srcs.length == 1 && srcs[0].isDir()) {
- // rename can fail if the parent doesn't exist
- Path destfp = destf.getParent();
- if (!destFs.exists(destfp)) {
- boolean success = destFs.mkdirs(destfp);
- if (!success) {
- LOG.warn("Error creating directory " + destf.toString());
- }
- if (inheritPerms && success) {
- inheritFromTable(tablePath, destfp, conf, destFs);
- }
- }
-
- // Copy/move each file under the source directory to avoid to delete the destination
- // directory if it is the root of an HDFS encryption zone.
- for (List<Path[]> sdpairs : result) {
- for (Path[] sdpair : sdpairs) {
- Path destParent = sdpair[1].getParent();
- FileSystem destParentFs = destParent.getFileSystem(conf);
- if (!destParentFs.isDirectory(destParent)) {
- boolean success = destFs.mkdirs(destParent);
- if (!success) {
- LOG.warn("Error creating directory " + destParent);
- }
- if (inheritPerms && success) {
- inheritFromTable(tablePath, destParent, conf, destFs);
- }
- }
- if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) {
- throw new IOException("Unable to move file/directory from " + sdpair[0] +
- " to " + sdpair[1]);
- }
- }
- }
- } else { // srcf is a file or pattern containing wildcards
- if (!destFs.exists(destf)) {
- boolean success = destFs.mkdirs(destf);
- if (!success) {
- LOG.warn("Error creating directory " + destf.toString());
- }
- if (inheritPerms && success) {
- inheritFromTable(tablePath, destf, conf, destFs);
- }
- }
- // srcs must be a list of files -- ensured by LoadSemanticAnalyzer
- for (List<Path[]> sdpairs : result) {
- for (Path[] sdpair : sdpairs) {
- if (!moveFile(conf, sdpair[0], sdpair[1], true,
- isSrcLocal)) {
- throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
- }
+ // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates
+ // destf with inherited permissions
+ boolean destfExist = FileUtils.mkdir(destFs, destf, true, conf);
+ if(!destfExist) {
+ throw new IOException("Directory " + destf.toString()
+ + " does not exist and could not be created.");
+ }
+
+ // Two cases:
+ // 1. srcs has only a src directory, if rename src directory to destf, we also need to
+ // Copy/move each file under the source directory to avoid to delete the destination
+ // directory if it is the root of an HDFS encryption zone.
+ // 2. srcs must be a list of files -- ensured by LoadSemanticAnalyzer
+ // in both cases, we move the file under destf
+ for (List<Path[]> sdpairs : result) {
+ for (Path[] sdpair : sdpairs) {
+ if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) {
+ throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
}
}
}
@@ -3015,38 +2977,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
- /**
- * This method sets all paths from tablePath to destf (including destf) to have same permission as tablePath.
- * @param tablePath path of table
- * @param destf path of table-subdir.
- * @param conf
- * @param fs
- */
- private static void inheritFromTable(Path tablePath, Path destf, HiveConf conf, FileSystem fs) {
- if (!FileUtils.isSubDir(destf, tablePath, fs)) {
- //partition may not be under the parent.
- return;
- }
- HadoopShims shims = ShimLoader.getHadoopShims();
- //Calculate all the paths from the table dir, to destf
- //At end of this loop, currPath is table dir, and pathsToSet contain list of all those paths.
- Path currPath = destf;
- List<Path> pathsToSet = new LinkedList<Path>();
- while (!currPath.equals(tablePath)) {
- pathsToSet.add(currPath);
- currPath = currPath.getParent();
- }
-
- try {
- HadoopShims.HdfsFileStatus fullFileStatus = shims.getFullFileStatus(conf, fs, currPath);
- for (Path pathToSet : pathsToSet) {
- shims.setFullFileStatus(conf, fullFileStatus, fs, pathToSet);
- }
- } catch (Exception e) {
- LOG.warn("Error setting permissions or group of " + destf, e);
- }
- }
-
public static boolean isHadoop1() {
return ShimLoader.getMajorVersion().startsWith("0.20");
}
[18/22] hive git commit: HIVE-11960 : braces in join conditions are
not supported (Sergey Shelukhin, reviewed by Pengcheng Xiong)
Posted by se...@apache.org.
HIVE-11960 : braces in join conditions are not supported (Sergey Shelukhin, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/947871a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/947871a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/947871a3
Branch: refs/heads/llap
Commit: 947871a3b060adbc46cad8d9416117a81f50fd94
Parents: 116c3e3
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:14:12 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:14:12 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/FromClauseParser.g | 30 +-
.../apache/hadoop/hive/ql/parse/HiveParser.g | 7 +-
ql/src/test/queries/clientpositive/join_parse.q | 20 +
.../clientnegative/cte_with_in_subquery.q.out | 2 +-
.../results/clientpositive/join_parse.q.out | 516 +++++++++++++++++++
5 files changed, 568 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
index 038ed99..084c421 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
@@ -94,7 +94,7 @@ joinSource
;
uniqueJoinSource
-@init { gParent.pushMsg("join source", state); }
+@init { gParent.pushMsg("unique join source", state); }
@after { gParent.popMsg(state); }
: KW_PRESERVE? fromSource uniqueJoinExpr
;
@@ -147,6 +147,16 @@ fromSource
@init { gParent.pushMsg("from source", state); }
@after { gParent.popMsg(state); }
:
+ (LPAREN KW_VALUES) => fromSource0
+ | (LPAREN) => LPAREN joinSource RPAREN -> joinSource
+ | fromSource0
+ ;
+
+
+fromSource0
+@init { gParent.pushMsg("from source 0", state); }
+@after { gParent.popMsg(state); }
+ :
((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)*
;
@@ -270,11 +280,15 @@ searchCondition
// INSERT INTO <table> (col1,col2,...) VALUES(...),(...),...
// INSERT INTO <table> (col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c)
valueRowConstructor
+@init { gParent.pushMsg("value row constructor", state); }
+@after { gParent.popMsg(state); }
:
LPAREN precedenceUnaryPrefixExpression (COMMA precedenceUnaryPrefixExpression)* RPAREN -> ^(TOK_VALUE_ROW precedenceUnaryPrefixExpression+)
;
valuesTableConstructor
+@init { gParent.pushMsg("values table constructor", state); }
+@after { gParent.popMsg(state); }
:
valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+)
;
@@ -285,6 +299,8 @@ VALUES(1,2),(3,4) means 2 rows, 2 columns each.
VALUES(1,2,3) means 1 row, 3 columns
*/
valuesClause
+@init { gParent.pushMsg("values clause", state); }
+@after { gParent.popMsg(state); }
:
KW_VALUES valuesTableConstructor -> valuesTableConstructor
;
@@ -294,16 +310,20 @@ This represents a clause like this:
(VALUES(1,2),(2,3)) as VirtTable(col1,col2)
*/
virtualTableSource
- :
- LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause)
- ;
+@init { gParent.pushMsg("virtual table source", state); }
+@after { gParent.popMsg(state); }
+ :
+ LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause)
+ ;
/*
e.g. as VirtTable(col1,col2)
Note that we only want literals as column names
*/
tableNameColList
+@init { gParent.pushMsg("from source", state); }
+@after { gParent.popMsg(state); }
:
KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+))
;
-//-----------------------------------------------------------------------------------
\ No newline at end of file
+//-----------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 3df67e9..161e549 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -376,6 +376,8 @@ import java.util.Collection;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
}
@@ -618,19 +620,22 @@ import org.apache.hadoop.hive.conf.HiveConf;
return msg;
}
+ public static final Log LOG = LogFactory.getLog("HiveParser");
public void pushMsg(String msg, RecognizerSharedState state) {
// ANTLR generated code does not wrap the @init code wit this backtracking check,
// even if the matching @after has it. If we have parser rules with that are doing
// some lookahead with syntactic predicates this can cause the push() and pop() calls
// to become unbalanced, so make sure both push/pop check the backtracking state.
if (state.backtracking == 0) {
+ // LOG.debug("Push " + msg);
msgs.push(msg);
}
}
public void popMsg(RecognizerSharedState state) {
if (state.backtracking == 0) {
- msgs.pop();
+ Object o = msgs.pop();
+ // LOG.debug("Pop " + o);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/queries/clientpositive/join_parse.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_parse.q b/ql/src/test/queries/clientpositive/join_parse.q
new file mode 100644
index 0000000..5955efd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_parse.q
@@ -0,0 +1,20 @@
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value;
+
+explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value;
+
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value);
+
+explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out b/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
index fa22b48..1122ca5 100644
--- a/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
+++ b/ql/src/test/results/clientnegative/cte_with_in_subquery.q.out
@@ -1 +1 @@
-FAILED: ParseException line 1:64 Failed to recognize predicate 'select'. Failed rule: 'queryStatementExpression' in subquery source
+FAILED: ParseException line 1:20 cannot recognize input near 'with' 'q1' 'as' in from source
http://git-wip-us.apache.org/repos/asf/hive/blob/947871a3/ql/src/test/results/clientpositive/join_parse.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_parse.q.out b/ql/src/test/results/clientpositive/join_parse.q.out
new file mode 100644
index 0000000..e1a23a0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_parse.q.out
@@ -0,0 +1,516 @@
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key))
+inner join src src1 on src1.value =srcpart.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value is not null and key is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+(srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value is not null and key is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value is not null and key is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select srcpart.key, src1.value from
+((srcpart inner join src on srcpart.key = src.key)
+inner join src src1 on src1.value =srcpart.value)
+inner join src src2 on src2.key = src1.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col4
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col4 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[16/22] hive git commit: HIVE-11970 : COLUMNS_V2 table in metastore
should have a longer name field (Sergey Shelukhin, reviewed by Alan Gates)
Posted by se...@apache.org.
HIVE-11970 : COLUMNS_V2 table in metastore should have a longer name field (Sergey Shelukhin, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a953b436
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a953b436
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a953b436
Branch: refs/heads/llap
Commit: a953b43628bf0dbbb9aadc4ae936e8c964534f3f
Parents: 82bc0e1
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:09:36 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:09:36 2015 -0700
----------------------------------------------------------------------
.../upgrade/derby/021-HIVE-11970.derby.sql | 6 +++++
.../upgrade/derby/hive-schema-1.3.0.derby.sql | 12 +++++-----
.../upgrade/derby/hive-schema-2.0.0.derby.sql | 12 +++++-----
.../derby/upgrade-1.2.0-to-1.3.0.derby.sql | 1 +
.../derby/upgrade-1.2.0-to-2.0.0.derby.sql | 3 ++-
.../upgrade/mssql/007-HIVE-11970.mssql.sql | 6 +++++
.../upgrade/mssql/hive-schema-1.3.0.mssql.sql | 12 +++++-----
.../upgrade/mssql/hive-schema-2.0.0.mssql.sql | 12 +++++-----
.../mssql/upgrade-1.2.0-to-1.3.0.mssql.sql | 1 +
.../mssql/upgrade-1.2.0-to-2.0.0.mssql.sql | 7 +++---
.../upgrade/mysql/022-HIVE-11970.mysql.sql | 6 +++++
.../upgrade/mysql/hive-schema-1.3.0.mysql.sql | 12 +++++-----
.../upgrade/mysql/hive-schema-2.0.0.mysql.sql | 12 +++++-----
.../mysql/upgrade-1.2.0-to-1.3.0.mysql.sql | 1 +
.../mysql/upgrade-1.2.0-to-2.0.0.mysql.sql | 2 ++
.../upgrade/oracle/022-HIVE-11970.oracle.sql | 23 ++++++++++++++++++++
.../upgrade/oracle/hive-schema-1.3.0.oracle.sql | 12 +++++-----
.../upgrade/oracle/hive-schema-2.0.0.oracle.sql | 12 +++++-----
.../oracle/upgrade-1.2.0-to-1.3.0.oracle.sql | 2 ++
.../oracle/upgrade-1.2.0-to-2.0.0.oracle.sql | 2 ++
.../postgres/021-HIVE-11970.postgres.sql | 6 +++++
.../postgres/hive-schema-1.3.0.postgres.sql | 12 +++++-----
.../postgres/hive-schema-2.0.0.postgres.sql | 12 +++++-----
.../upgrade-1.2.0-to-1.3.0.postgres.sql | 1 +
.../upgrade-1.2.0-to-2.0.0.postgres.sql | 1 +
25 files changed, 124 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql b/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
new file mode 100644
index 0000000..6a01a53
--- /dev/null
+++ b/metastore/scripts/upgrade/derby/021-HIVE-11970.derby.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "PART_COL_PRIVS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "TBL_COL_PRIVS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "SORT_COLS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "TAB_COL_STATS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
+ALTER TABLE "PART_COL_STATS" ALTER "COLUMN_NAME" SET DATA TYPE VARCHAR(1000);
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
index c50375f..20eb326 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-1.3.0.derby.sql
@@ -22,13 +22,13 @@ CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" IN
CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
-CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
+CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
-CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(1000) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
-CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL);
@@ -64,7 +64,7 @@ CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT N
CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
-CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
+CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL);
@@ -94,9 +94,9 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as
CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767));
-CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql b/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
index 1cc0a24..abc6bf4 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-2.0.0.derby.sql
@@ -22,13 +22,13 @@ CREATE TABLE "APP"."TBL_PRIVS" ("TBL_GRANT_ID" BIGINT NOT NULL, "CREATE_TIME" IN
CREATE TABLE "APP"."DATABASE_PARAMS" ("DB_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(180) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
-CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
+CREATE TABLE "APP"."TBL_COL_PRIVS" ("TBL_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "TBL_COL_PRIV" VARCHAR(128), "TBL_ID" BIGINT);
CREATE TABLE "APP"."SERDE_PARAMS" ("SERDE_ID" BIGINT NOT NULL, "PARAM_KEY" VARCHAR(256) NOT NULL, "PARAM_VALUE" VARCHAR(4000));
-CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(128) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."COLUMNS_V2" ("CD_ID" BIGINT NOT NULL, "COMMENT" VARCHAR(4000), "COLUMN_NAME" VARCHAR(1000) NOT NULL, "TYPE_NAME" VARCHAR(4000), "INTEGER_IDX" INTEGER NOT NULL);
-CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
+CREATE TABLE "APP"."SORT_COLS" ("SD_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "ORDER" INTEGER NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
CREATE TABLE "APP"."CDS" ("CD_ID" BIGINT NOT NULL);
@@ -64,7 +64,7 @@ CREATE TABLE "APP"."TBLS" ("TBL_ID" BIGINT NOT NULL, "CREATE_TIME" INTEGER NOT N
CREATE TABLE "APP"."PARTITION_KEYS" ("TBL_ID" BIGINT NOT NULL, "PKEY_COMMENT" VARCHAR(4000), "PKEY_NAME" VARCHAR(128) NOT NULL, "PKEY_TYPE" VARCHAR(767) NOT NULL, "INTEGER_IDX" INTEGER NOT NULL);
-CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(128), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
+CREATE TABLE "APP"."PART_COL_PRIVS" ("PART_COLUMN_GRANT_ID" BIGINT NOT NULL, "COLUMN_NAME" VARCHAR(1000), "CREATE_TIME" INTEGER NOT NULL, "GRANT_OPTION" SMALLINT NOT NULL, "GRANTOR" VARCHAR(128), "GRANTOR_TYPE" VARCHAR(128), "PART_ID" BIGINT, "PRINCIPAL_NAME" VARCHAR(128), "PRINCIPAL_TYPE" VARCHAR(128), "PART_COL_PRIV" VARCHAR(128));
CREATE TABLE "APP"."SDS" ("SD_ID" BIGINT NOT NULL, "INPUT_FORMAT" VARCHAR(4000), "IS_COMPRESSED" CHAR(1) NOT NULL, "LOCATION" VARCHAR(4000), "NUM_BUCKETS" INTEGER NOT NULL, "OUTPUT_FORMAT" VARCHAR(4000), "SERDE_ID" BIGINT, "CD_ID" BIGINT, "IS_STOREDASSUBDIRECTORIES" CHAR(1) NOT NULL);
@@ -94,9 +94,9 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL generated always as
CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, "TOKEN" VARCHAR(767));
-CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."TAB_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "TBL_ID" BIGINT NOT NULL);
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(128) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT NULL,"TABLE_NAME" VARCHAR(128) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT NULL, "COLUMN_NAME" VARCHAR(1000) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, "LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, "DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), "BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, "PART_ID" BIGINT NOT NULL);
CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
index dee744b..6359de4 100644
--- a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-1.3.0.derby.sql
@@ -1,3 +1,4 @@
-- Upgrade MetaStore schema from 1.2.0 to 1.3.0
+RUN '021-HIVE-11970.derby.sql';
UPDATE "APP".VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
index 976ebd1..adf3cb0 100644
--- a/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-1.2.0-to-2.0.0.derby.sql
@@ -1,3 +1,4 @@
--- Upgrade MetaStore schema from 1.3.0 to 2.0.0
+-- Upgrade MetaStore schema from 1.2.0 to 2.0.0
+RUN '021-HIVE-11970.derby.sql';
UPDATE "APP".VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql b/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
new file mode 100644
index 0000000..69b11a2
--- /dev/null
+++ b/metastore/scripts/upgrade/mssql/007-HIVE-11970.mssql.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;
+ALTER TABLE "PART_COL_PRIVS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "TBL_COL_PRIVS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "SORT_COLS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NULL;
+ALTER TABLE "TAB_COL_STATS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;
+ALTER TABLE "PART_COL_STATS" ALTER COLUMN "COLUMN_NAME" VARCHAR(1000) NOT NULL;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
index 7165edd..01c1376 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-1.3.0.mssql.sql
@@ -75,7 +75,7 @@ CREATE TABLE PART_COL_STATS
(
CS_ID bigint NOT NULL,
AVG_COL_LEN float NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
COLUMN_TYPE nvarchar(128) NOT NULL,
DB_NAME nvarchar(128) NOT NULL,
BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -184,7 +184,7 @@ ALTER TABLE GLOBAL_PRIVS ADD CONSTRAINT GLOBAL_PRIVS_PK PRIMARY KEY (USER_GRANT_
CREATE TABLE PART_COL_PRIVS
(
PART_COLUMN_GRANT_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
CREATE_TIME int NOT NULL,
GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
GRANTOR nvarchar(128) NULL,
@@ -218,7 +218,7 @@ CREATE TABLE TAB_COL_STATS
(
CS_ID bigint NOT NULL,
AVG_COL_LEN float NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
COLUMN_TYPE nvarchar(128) NOT NULL,
DB_NAME nvarchar(128) NOT NULL,
BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -283,7 +283,7 @@ ALTER TABLE DBS ADD CONSTRAINT DBS_PK PRIMARY KEY (DB_ID);
CREATE TABLE TBL_COL_PRIVS
(
TBL_COLUMN_GRANT_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
CREATE_TIME int NOT NULL,
GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
GRANTOR nvarchar(128) NULL,
@@ -396,7 +396,7 @@ ALTER TABLE PARTITION_EVENTS ADD CONSTRAINT PARTITION_EVENTS_PK PRIMARY KEY (PAR
CREATE TABLE SORT_COLS
(
SD_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
"ORDER" int NOT NULL,
INTEGER_IDX int NOT NULL
);
@@ -533,7 +533,7 @@ CREATE TABLE COLUMNS_V2
(
CD_ID bigint NOT NULL,
COMMENT nvarchar(256) NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
TYPE_NAME nvarchar(4000) NOT NULL,
INTEGER_IDX int NOT NULL
);
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
index 36e7c1f..1ec8632 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-2.0.0.mssql.sql
@@ -75,7 +75,7 @@ CREATE TABLE PART_COL_STATS
(
CS_ID bigint NOT NULL,
AVG_COL_LEN float NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
COLUMN_TYPE nvarchar(128) NOT NULL,
DB_NAME nvarchar(128) NOT NULL,
BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -184,7 +184,7 @@ ALTER TABLE GLOBAL_PRIVS ADD CONSTRAINT GLOBAL_PRIVS_PK PRIMARY KEY (USER_GRANT_
CREATE TABLE PART_COL_PRIVS
(
PART_COLUMN_GRANT_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
CREATE_TIME int NOT NULL,
GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
GRANTOR nvarchar(128) NULL,
@@ -218,7 +218,7 @@ CREATE TABLE TAB_COL_STATS
(
CS_ID bigint NOT NULL,
AVG_COL_LEN float NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
COLUMN_TYPE nvarchar(128) NOT NULL,
DB_NAME nvarchar(128) NOT NULL,
BIG_DECIMAL_HIGH_VALUE nvarchar(255) NULL,
@@ -283,7 +283,7 @@ ALTER TABLE DBS ADD CONSTRAINT DBS_PK PRIMARY KEY (DB_ID);
CREATE TABLE TBL_COL_PRIVS
(
TBL_COLUMN_GRANT_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
CREATE_TIME int NOT NULL,
GRANT_OPTION smallint NOT NULL CHECK (GRANT_OPTION IN (0,1)),
GRANTOR nvarchar(128) NULL,
@@ -396,7 +396,7 @@ ALTER TABLE PARTITION_EVENTS ADD CONSTRAINT PARTITION_EVENTS_PK PRIMARY KEY (PAR
CREATE TABLE SORT_COLS
(
SD_ID bigint NOT NULL,
- "COLUMN_NAME" nvarchar(128) NULL,
+ "COLUMN_NAME" nvarchar(1000) NULL,
"ORDER" int NOT NULL,
INTEGER_IDX int NOT NULL
);
@@ -533,7 +533,7 @@ CREATE TABLE COLUMNS_V2
(
CD_ID bigint NOT NULL,
COMMENT nvarchar(256) NULL,
- "COLUMN_NAME" nvarchar(128) NOT NULL,
+ "COLUMN_NAME" nvarchar(1000) NOT NULL,
TYPE_NAME nvarchar(4000) NOT NULL,
INTEGER_IDX int NOT NULL
);
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
index fd12a92..9cf9d25 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-1.3.0.mssql.sql
@@ -1,5 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
+:r 007-HIVE-11970.mssql.sql;
UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
index fd12a92..de3d29d 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-1.2.0-to-2.0.0.mssql.sql
@@ -1,5 +1,6 @@
-SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
+SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS MESSAGE;
+:r 007-HIVE-11970.mssql.sql;
-UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
-SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS MESSAGE;
+UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
+SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS MESSAGE;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql b/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
new file mode 100644
index 0000000..4517e00
--- /dev/null
+++ b/metastore/scripts/upgrade/mysql/022-HIVE-11970.mysql.sql
@@ -0,0 +1,6 @@
+ALTER TABLE `COLUMNS_V2` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;
+ALTER TABLE `PART_COL_PRIVS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `TBL_COL_PRIVS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `SORT_COLS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL;
+ALTER TABLE `TAB_COL_STATS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;
+ALTER TABLE `PART_COL_STATS` MODIFY `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
index 71de138..ce0ac54 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-1.3.0.mysql.sql
@@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS `CDS` (
CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
`CD_ID` bigint(20) NOT NULL,
`COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TYPE_NAME` varchar(4000) DEFAULT NULL,
`INTEGER_IDX` int(11) NOT NULL,
PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
@@ -296,7 +296,7 @@ CREATE TABLE IF NOT EXISTS `PARTITION_PARAMS` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `PART_COL_PRIVS` (
`PART_COLUMN_GRANT_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`CREATE_TIME` int(11) NOT NULL,
`GRANT_OPTION` smallint(6) NOT NULL,
`GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -544,7 +544,7 @@ CREATE TABLE IF NOT EXISTS `SKEWED_VALUES` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `SORT_COLS` (
`SD_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`ORDER` int(11) NOT NULL,
`INTEGER_IDX` int(11) NOT NULL,
PRIMARY KEY (`SD_ID`,`INTEGER_IDX`),
@@ -604,7 +604,7 @@ CREATE TABLE IF NOT EXISTS `TBLS` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `TBL_COL_PRIVS` (
`TBL_COLUMN_GRANT_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`CREATE_TIME` int(11) NOT NULL,
`GRANT_OPTION` smallint(6) NOT NULL,
`GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -650,7 +650,7 @@ CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` (
`CS_ID` bigint(20) NOT NULL,
`DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TBL_ID` bigint(20) NOT NULL,
`LONG_LOW_VALUE` bigint(20),
@@ -678,7 +678,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
`DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`PART_ID` bigint(20) NOT NULL,
`LONG_LOW_VALUE` bigint(20),
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
index 6547cf1..3a2c0e2 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-2.0.0.mysql.sql
@@ -52,7 +52,7 @@ CREATE TABLE IF NOT EXISTS `CDS` (
CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
`CD_ID` bigint(20) NOT NULL,
`COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TYPE_NAME` varchar(4000) DEFAULT NULL,
`INTEGER_IDX` int(11) NOT NULL,
PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
@@ -296,7 +296,7 @@ CREATE TABLE IF NOT EXISTS `PARTITION_PARAMS` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `PART_COL_PRIVS` (
`PART_COLUMN_GRANT_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`CREATE_TIME` int(11) NOT NULL,
`GRANT_OPTION` smallint(6) NOT NULL,
`GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -544,7 +544,7 @@ CREATE TABLE IF NOT EXISTS `SKEWED_VALUES` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `SORT_COLS` (
`SD_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`ORDER` int(11) NOT NULL,
`INTEGER_IDX` int(11) NOT NULL,
PRIMARY KEY (`SD_ID`,`INTEGER_IDX`),
@@ -604,7 +604,7 @@ CREATE TABLE IF NOT EXISTS `TBLS` (
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE IF NOT EXISTS `TBL_COL_PRIVS` (
`TBL_COLUMN_GRANT_ID` bigint(20) NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`CREATE_TIME` int(11) NOT NULL,
`GRANT_OPTION` smallint(6) NOT NULL,
`GRANTOR` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
@@ -650,7 +650,7 @@ CREATE TABLE IF NOT EXISTS `TAB_COL_STATS` (
`CS_ID` bigint(20) NOT NULL,
`DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TBL_ID` bigint(20) NOT NULL,
`LONG_LOW_VALUE` bigint(20),
@@ -678,7 +678,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
`DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
- `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `COLUMN_NAME` varchar(1000) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`COLUMN_TYPE` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`PART_ID` bigint(20) NOT NULL,
`LONG_LOW_VALUE` bigint(20),
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
index 44a9946..1b32d93 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-1.3.0.mysql.sql
@@ -1,4 +1,5 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS ' ';
SOURCE 021-HIVE-7018.mysql.sql;
+SOURCE 022-HIVE-11970.mysql.sql;
UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS ' ';
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
index 22d5242..1340f27 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-1.2.0-to-2.0.0.mysql.sql
@@ -1,4 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS ' ';
+SOURCE 021-HIVE-7018.mysql.sql;
+SOURCE 022-HIVE-11970.mysql.sql;
UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS ' ';
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql b/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
new file mode 100644
index 0000000..2f11157
--- /dev/null
+++ b/metastore/scripts/upgrade/oracle/022-HIVE-11970.oracle.sql
@@ -0,0 +1,23 @@
+ALTER TABLE COLUMNS_V2 MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE PART_COL_PRIVS MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE TBL_COL_PRIVS MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE SORT_COLS MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE TAB_COL_STATS MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
+
+ALTER TABLE PART_COL_STATS MODIFY (
+ "COLUMN_NAME" VARCHAR2(1000)
+);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
index 7605bc7..d2e7945 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-1.3.0.oracle.sql
@@ -29,7 +29,7 @@ ALTER TABLE NUCLEUS_TABLES ADD CONSTRAINT NUCLEUS_TABLES_PK PRIMARY KEY (CLASS_N
CREATE TABLE PART_COL_PRIVS
(
PART_COLUMN_GRANT_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
CREATE_TIME NUMBER (10) NOT NULL,
GRANT_OPTION NUMBER (5) NOT NULL,
GRANTOR VARCHAR2(128) NULL,
@@ -55,7 +55,7 @@ CREATE TABLE COLUMNS_V2
(
CD_ID NUMBER NOT NULL,
"COMMENT" VARCHAR2(256) NULL,
- "COLUMN_NAME" VARCHAR2(128) NOT NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NOT NULL,
TYPE_NAME VARCHAR2(4000) NOT NULL,
INTEGER_IDX NUMBER(10) NOT NULL
);
@@ -166,7 +166,7 @@ ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_PK PRIMARY KEY (INDEX_ID,PA
CREATE TABLE TBL_COL_PRIVS
(
TBL_COLUMN_GRANT_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
CREATE_TIME NUMBER (10) NOT NULL,
GRANT_OPTION NUMBER (5) NOT NULL,
GRANTOR VARCHAR2(128) NULL,
@@ -272,7 +272,7 @@ ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_PK PRIMARY KEY (TBL_ID,PARA
CREATE TABLE SORT_COLS
(
SD_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
"ORDER" NUMBER (10) NOT NULL,
INTEGER_IDX NUMBER(10) NOT NULL
);
@@ -467,7 +467,7 @@ CREATE TABLE TAB_COL_STATS (
CS_ID NUMBER NOT NULL,
DB_NAME VARCHAR2(128) NOT NULL,
TABLE_NAME VARCHAR2(128) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
COLUMN_TYPE VARCHAR2(128) NOT NULL,
TBL_ID NUMBER NOT NULL,
LONG_LOW_VALUE NUMBER,
@@ -503,7 +503,7 @@ CREATE TABLE PART_COL_STATS (
DB_NAME VARCHAR2(128) NOT NULL,
TABLE_NAME VARCHAR2(128) NOT NULL,
PARTITION_NAME VARCHAR2(767) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
COLUMN_TYPE VARCHAR2(128) NOT NULL,
PART_ID NUMBER NOT NULL,
LONG_LOW_VALUE NUMBER,
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
index 8d963ce..2dcdd77 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-2.0.0.oracle.sql
@@ -29,7 +29,7 @@ ALTER TABLE NUCLEUS_TABLES ADD CONSTRAINT NUCLEUS_TABLES_PK PRIMARY KEY (CLASS_N
CREATE TABLE PART_COL_PRIVS
(
PART_COLUMN_GRANT_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
CREATE_TIME NUMBER (10) NOT NULL,
GRANT_OPTION NUMBER (5) NOT NULL,
GRANTOR VARCHAR2(128) NULL,
@@ -55,7 +55,7 @@ CREATE TABLE COLUMNS_V2
(
CD_ID NUMBER NOT NULL,
"COMMENT" VARCHAR2(256) NULL,
- "COLUMN_NAME" VARCHAR2(128) NOT NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NOT NULL,
TYPE_NAME VARCHAR2(4000) NOT NULL,
INTEGER_IDX NUMBER(10) NOT NULL
);
@@ -166,7 +166,7 @@ ALTER TABLE INDEX_PARAMS ADD CONSTRAINT INDEX_PARAMS_PK PRIMARY KEY (INDEX_ID,PA
CREATE TABLE TBL_COL_PRIVS
(
TBL_COLUMN_GRANT_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
CREATE_TIME NUMBER (10) NOT NULL,
GRANT_OPTION NUMBER (5) NOT NULL,
GRANTOR VARCHAR2(128) NULL,
@@ -272,7 +272,7 @@ ALTER TABLE TABLE_PARAMS ADD CONSTRAINT TABLE_PARAMS_PK PRIMARY KEY (TBL_ID,PARA
CREATE TABLE SORT_COLS
(
SD_ID NUMBER NOT NULL,
- "COLUMN_NAME" VARCHAR2(128) NULL,
+ "COLUMN_NAME" VARCHAR2(1000) NULL,
"ORDER" NUMBER (10) NOT NULL,
INTEGER_IDX NUMBER(10) NOT NULL
);
@@ -467,7 +467,7 @@ CREATE TABLE TAB_COL_STATS (
CS_ID NUMBER NOT NULL,
DB_NAME VARCHAR2(128) NOT NULL,
TABLE_NAME VARCHAR2(128) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
COLUMN_TYPE VARCHAR2(128) NOT NULL,
TBL_ID NUMBER NOT NULL,
LONG_LOW_VALUE NUMBER,
@@ -503,7 +503,7 @@ CREATE TABLE PART_COL_STATS (
DB_NAME VARCHAR2(128) NOT NULL,
TABLE_NAME VARCHAR2(128) NOT NULL,
PARTITION_NAME VARCHAR2(767) NOT NULL,
- COLUMN_NAME VARCHAR2(128) NOT NULL,
+ COLUMN_NAME VARCHAR2(1000) NOT NULL,
COLUMN_TYPE VARCHAR2(128) NOT NULL,
PART_ID NUMBER NOT NULL,
LONG_LOW_VALUE NUMBER,
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
index f072a1c..bd283d4 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-1.3.0.oracle.sql
@@ -1,4 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0' AS Status from dual;
+@022-HIVE-11970.oracle.sql;
+
UPDATE VERSION SET SCHEMA_VERSION='1.3.0', VERSION_COMMENT='Hive release version 1.3.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0' AS Status from dual;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
index efff2c9..ceb09be 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-1.2.0-to-2.0.0.oracle.sql
@@ -1,4 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0' AS Status from dual;
+@022-HIVE-11970.oracle.sql;
+
UPDATE VERSION SET SCHEMA_VERSION='2.0.0', VERSION_COMMENT='Hive release version 2.0.0' where VER_ID=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0' AS Status from dual;
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql b/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
new file mode 100644
index 0000000..08cb4a5
--- /dev/null
+++ b/metastore/scripts/upgrade/postgres/021-HIVE-11970.postgres.sql
@@ -0,0 +1,6 @@
+ALTER TABLE "COLUMNS_V2" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "PART_COL_PRIVS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "TBL_COL_PRIVS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "SORT_COLS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "TAB_COL_STATS" ALTER "COLUMN_NAME" TYPE character varying(1000);
+ALTER TABLE "PART_COL_STATS" ALTER "COLUMN_NAME" TYPE character varying(1000);
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
index 3ab5b3e..9bb5765 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-1.3.0.postgres.sql
@@ -42,7 +42,7 @@ CREATE TABLE "CDS" (
CREATE TABLE "COLUMNS_V2" (
"CD_ID" bigint NOT NULL,
"COMMENT" character varying(4000),
- "COLUMN_NAME" character varying(128) NOT NULL,
+ "COLUMN_NAME" character varying(1000) NOT NULL,
"TYPE_NAME" character varying(4000),
"INTEGER_IDX" integer NOT NULL
);
@@ -217,7 +217,7 @@ CREATE TABLE "PARTITION_PARAMS" (
CREATE TABLE "PART_COL_PRIVS" (
"PART_COLUMN_GRANT_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"CREATE_TIME" bigint NOT NULL,
"GRANT_OPTION" smallint NOT NULL,
"GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -340,7 +340,7 @@ CREATE TABLE "SERDE_PARAMS" (
CREATE TABLE "SORT_COLS" (
"SD_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"ORDER" bigint NOT NULL,
"INTEGER_IDX" bigint NOT NULL
);
@@ -382,7 +382,7 @@ CREATE TABLE "TBLS" (
CREATE TABLE "TBL_COL_PRIVS" (
"TBL_COLUMN_GRANT_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"CREATE_TIME" bigint NOT NULL,
"GRANT_OPTION" smallint NOT NULL,
"GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -486,7 +486,7 @@ CREATE TABLE "TAB_COL_STATS" (
"CS_ID" bigint NOT NULL,
"DB_NAME" character varying(128) DEFAULT NULL::character varying,
"TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
"TBL_ID" bigint NOT NULL,
"LONG_LOW_VALUE" bigint,
@@ -522,7 +522,7 @@ CREATE TABLE "PART_COL_STATS" (
"DB_NAME" character varying(128) DEFAULT NULL::character varying,
"TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
"PARTITION_NAME" character varying(767) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
"PART_ID" bigint NOT NULL,
"LONG_LOW_VALUE" bigint,
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
index 6442eb1..c749a29 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-2.0.0.postgres.sql
@@ -42,7 +42,7 @@ CREATE TABLE "CDS" (
CREATE TABLE "COLUMNS_V2" (
"CD_ID" bigint NOT NULL,
"COMMENT" character varying(4000),
- "COLUMN_NAME" character varying(128) NOT NULL,
+ "COLUMN_NAME" character varying(1000) NOT NULL,
"TYPE_NAME" character varying(4000),
"INTEGER_IDX" integer NOT NULL
);
@@ -217,7 +217,7 @@ CREATE TABLE "PARTITION_PARAMS" (
CREATE TABLE "PART_COL_PRIVS" (
"PART_COLUMN_GRANT_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"CREATE_TIME" bigint NOT NULL,
"GRANT_OPTION" smallint NOT NULL,
"GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -340,7 +340,7 @@ CREATE TABLE "SERDE_PARAMS" (
CREATE TABLE "SORT_COLS" (
"SD_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"ORDER" bigint NOT NULL,
"INTEGER_IDX" bigint NOT NULL
);
@@ -382,7 +382,7 @@ CREATE TABLE "TBLS" (
CREATE TABLE "TBL_COL_PRIVS" (
"TBL_COLUMN_GRANT_ID" bigint NOT NULL,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"CREATE_TIME" bigint NOT NULL,
"GRANT_OPTION" smallint NOT NULL,
"GRANTOR" character varying(128) DEFAULT NULL::character varying,
@@ -486,7 +486,7 @@ CREATE TABLE "TAB_COL_STATS" (
"CS_ID" bigint NOT NULL,
"DB_NAME" character varying(128) DEFAULT NULL::character varying,
"TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
"TBL_ID" bigint NOT NULL,
"LONG_LOW_VALUE" bigint,
@@ -522,7 +522,7 @@ CREATE TABLE "PART_COL_STATS" (
"DB_NAME" character varying(128) DEFAULT NULL::character varying,
"TABLE_NAME" character varying(128) DEFAULT NULL::character varying,
"PARTITION_NAME" character varying(767) DEFAULT NULL::character varying,
- "COLUMN_NAME" character varying(128) DEFAULT NULL::character varying,
+ "COLUMN_NAME" character varying(1000) DEFAULT NULL::character varying,
"COLUMN_TYPE" character varying(128) DEFAULT NULL::character varying,
"PART_ID" bigint NOT NULL,
"LONG_LOW_VALUE" bigint,
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
index cdd3792..b1ec241 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-1.3.0.postgres.sql
@@ -1,5 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 1.3.0';
+\i 021-HIVE-11970.postgres.sql;
UPDATE "VERSION" SET "SCHEMA_VERSION"='1.3.0', "VERSION_COMMENT"='Hive release version 1.3.0' where "VER_ID"=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 1.3.0';
http://git-wip-us.apache.org/repos/asf/hive/blob/a953b436/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
index b6b0c35..628444c 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-1.2.0-to-2.0.0.postgres.sql
@@ -1,5 +1,6 @@
SELECT 'Upgrading MetaStore schema from 1.2.0 to 2.0.0';
+\i 021-HIVE-11970.postgres.sql;
UPDATE "VERSION" SET "SCHEMA_VERSION"='2.0.0', "VERSION_COMMENT"='Hive release version 2.0.0' where "VER_ID"=1;
SELECT 'Finished upgrading MetaStore schema from 1.2.0 to 2.0.0';
[13/22] hive git commit: HIVE-11982 : Some test cases for union all
fail with recent changes (Yongzhi Chen via Szehon)
Posted by se...@apache.org.
HIVE-11982 : Some test cases for union all fail with recent changes (Yongzhi Chen via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/522bb600
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/522bb600
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/522bb600
Branch: refs/heads/llap
Commit: 522bb600b54cf7667de7bfa75cb286e680018842
Parents: 7b1ed3d
Author: Szehon Ho <sz...@cloudera.com>
Authored: Thu Oct 1 11:33:39 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Thu Oct 1 11:33:39 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/522bb600/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index 8bcb464..2207cfb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -270,7 +270,7 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
- prunList = prunedColLists.get(child);
+ prunList = genColLists(curOp, child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
[05/22] hive git commit: HIVE-11934 Transaction lock retry logic
results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11934 Transaction lock retry logic results in infinite loop(Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d43e876
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d43e876
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d43e876
Branch: refs/heads/llap
Commit: 0d43e876be9c36156a28bd2c2b9493f986841dd7
Parents: edd6300
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:05:34 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:05:34 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 117 +++++++++----------
1 file changed, 57 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0d43e876/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 0b19368..cc7e2c6 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -91,8 +91,8 @@ public class TxnHandler {
/**
* Number of consecutive deadlocks we have seen
*/
- protected int deadlockCnt;
- private long deadlockRetryInterval;
+ private int deadlockCnt;
+ private final long deadlockRetryInterval;
protected HiveConf conf;
protected DatabaseProduct dbProduct;
@@ -115,10 +115,8 @@ public class TxnHandler {
//
// All public methods that write to the database have to check for deadlocks when a SQLException
// comes back and handle it if they see one. This has to be done with the connection pooling
- // in mind. To do this they should call detectDeadlock AFTER rolling back the db transaction,
- // and then in an outer loop they should catch DeadlockException. In the catch for this they
- // should increment the deadlock counter and recall themselves. See commitTxn for an example.
- // the connection has been closed and returned to the pool.
+ // in mind. To do this they should call checkRetryable() AFTER rolling back the db transaction,
+ // and then they should catch RetryException and call themselves recursively. See commitTxn for an example.
public TxnHandler(HiveConf conf) {
this.conf = conf;
@@ -135,7 +133,6 @@ public class TxnHandler {
}
timeout = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_TXN_TIMEOUT, TimeUnit.MILLISECONDS);
- deadlockCnt = 0;
buildJumpTable();
retryInterval = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HMSHANDLERINTERVAL,
TimeUnit.MILLISECONDS);
@@ -280,7 +277,6 @@ public class TxnHandler {
}
public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
- deadlockCnt = 0; // Reset deadlock count since this is a new transaction
int numTxns = rqst.getNum_txns();
try {
Connection dbConn = null;
@@ -420,7 +416,6 @@ public class TxnHandler {
public LockResponse lock(LockRequest rqst)
throws NoSuchTxnException, TxnAbortedException, MetaException {
- deadlockCnt = 0;
try {
Connection dbConn = null;
try {
@@ -636,8 +631,6 @@ public class TxnHandler {
}
} catch (RetryException e) {
heartbeat(ids);
- } finally {
- deadlockCnt = 0;
}
}
@@ -903,14 +896,14 @@ public class TxnHandler {
void rollbackDBConn(Connection dbConn) {
try {
- if (dbConn != null) dbConn.rollback();
+ if (dbConn != null && !dbConn.isClosed()) dbConn.rollback();
} catch (SQLException e) {
LOG.warn("Failed to rollback db connection " + getMessage(e));
}
}
protected void closeDbConn(Connection dbConn) {
try {
- if (dbConn != null) dbConn.close();
+ if (dbConn != null && !dbConn.isClosed()) dbConn.close();
} catch (SQLException e) {
LOG.warn("Failed to close db connection " + getMessage(e));
}
@@ -922,7 +915,7 @@ public class TxnHandler {
*/
protected void closeStmt(Statement stmt) {
try {
- if (stmt != null) stmt.close();
+ if (stmt != null && !stmt.isClosed()) stmt.close();
} catch (SQLException e) {
LOG.warn("Failed to close statement " + getMessage(e));
}
@@ -952,15 +945,14 @@ public class TxnHandler {
closeDbConn(dbConn);
}
/**
- * Determine if an exception was such that it makse sense to retry. Unfortunately there is no standard way to do
+ * Determine if an exception was such that it makes sense to retry. Unfortunately there is no standard way to do
* this, so we have to inspect the error messages and catch the telltale signs for each
- * different database.
+ * different database. This method will throw {@code RetryException}
+ * if the error is retry-able.
* @param conn database connection
* @param e exception that was thrown.
- * @param caller name of the method calling this
- * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when deadlock
- * detected and retry count has not been exceeded.
- * TODO: make "caller" more elaborate like include lockId for example
+ * @param caller name of the method calling this (and other info useful to log)
+ * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when the operation should be retried
*/
protected void checkRetryable(Connection conn,
SQLException e,
@@ -973,53 +965,57 @@ public class TxnHandler {
// so I've tried to capture the different error messages (there appear to be fewer different
// error messages than SQL states).
// Derby and newer MySQL driver use the new SQLTransactionRollbackException
- if (dbProduct == null && conn != null) {
- determineDatabaseProduct(conn);
- }
- if (e instanceof SQLTransactionRollbackException ||
- ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
- dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
- (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
- (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
- || e.getMessage().contains("can't serialize access for this transaction")))) {
- if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
- long waitInterval = deadlockRetryInterval * deadlockCnt;
- LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
- "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
- // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
- try {
- Thread.sleep(waitInterval);
- } catch (InterruptedException ie) {
- // NOP
- }
- throw new RetryException();
- } else {
- LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
- deadlockCnt = 0;
+ boolean sendRetrySignal = false;
+ try {
+ if (dbProduct == null && conn != null) {
+ determineDatabaseProduct(conn);
}
- }
- else if(isRetryable(e)) {
- //in MSSQL this means Communication Link Failure
- if(retryNum++ < retryLimit) {
- LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval +
- "ms and retry up to " + (retryLimit - retryNum + 1) + " times. Error: " + getMessage(e));
- try {
- Thread.sleep(retryInterval);
+ if (e instanceof SQLTransactionRollbackException ||
+ ((dbProduct == DatabaseProduct.MYSQL || dbProduct == DatabaseProduct.POSTGRES ||
+ dbProduct == DatabaseProduct.SQLSERVER) && e.getSQLState().equals("40001")) ||
+ (dbProduct == DatabaseProduct.POSTGRES && e.getSQLState().equals("40P01")) ||
+ (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected")
+ || e.getMessage().contains("can't serialize access for this transaction")))) {
+ if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
+ long waitInterval = deadlockRetryInterval * deadlockCnt;
+ LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval +
+ "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times.");
+ // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock.
+ try {
+ Thread.sleep(waitInterval);
+ } catch (InterruptedException ie) {
+ // NOP
+ }
+ sendRetrySignal = true;
+ } else {
+ LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");
}
- catch(InterruptedException ex) {
- //
+ } else if (isRetryable(e)) {
+ //in MSSQL this means Communication Link Failure
+ if (retryNum++ < retryLimit) {
+ LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval +
+ "ms and retry up to " + (retryLimit - retryNum + 1) + " times. Error: " + getMessage(e));
+ try {
+ Thread.sleep(retryInterval);
+ } catch (InterruptedException ex) {
+ //
+ }
+ sendRetrySignal = true;
+ } else {
+ LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
}
- throw new RetryException();
}
- else {
- LOG.error("Fatal error. Retry limit (" + retryLimit + ") reached. Last error: " + getMessage(e));
+ }
+ finally {
+ /*if this method ends with anything except a retry signal, the caller should fail the operation
+ and propagate the error up to the its caller (Metastore client); thus must reset retry counters*/
+ if(!sendRetrySignal) {
+ deadlockCnt = 0;
retryNum = 0;
}
}
- else {
- //if here, we got something that will propagate the error (rather than retry), so reset counters
- deadlockCnt = 0;
- retryNum = 0;
+ if(sendRetrySignal) {
+ throw new RetryException();
}
}
@@ -2100,6 +2096,7 @@ public class TxnHandler {
//in MSSQL this means Communication Link Failure
return true;
}
+ //see https://issues.apache.org/jira/browse/HIVE-9938
}
return false;
}
[04/22] hive git commit: HIVE-11915 : BoneCP returns closed
connections from the pool (Sergey Shelukhin, reviewed by Thejar M Nair)
Posted by se...@apache.org.
HIVE-11915 : BoneCP returns closed connections from the pool (Sergey Shelukhin, reviewed by Thejar M Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edd63004
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edd63004
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edd63004
Branch: refs/heads/llap
Commit: edd63004375602bf7550513380ec25cf34ca5cf5
Parents: 50b6d0c
Author: Sergey Shelukhin <se...@apache.org>
Authored: Wed Sep 30 15:23:25 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Wed Sep 30 15:24:48 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 25 +++++++++++---------
1 file changed, 14 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/edd63004/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index 8597d9f..0b19368 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -84,6 +84,7 @@ public class TxnHandler {
static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName());
static private DataSource connPool;
+ static private boolean doRetryOnConnPool = false;
private final static Object lockLock = new Object(); // Random object to lock on for the lock
// method
@@ -885,18 +886,19 @@ public class TxnHandler {
}
- /**
- * Get a connection to the database
- * @param isolationLevel desired isolation level. If you are doing _any_ data modifications
- * you should request serializable, else read committed should be fine.
- * @return db connection
- * @throws MetaException if the connection cannot be obtained
- */
protected Connection getDbConn(int isolationLevel) throws SQLException {
- Connection dbConn = connPool.getConnection();
- dbConn.setAutoCommit(false);
- dbConn.setTransactionIsolation(isolationLevel);
- return dbConn;
+ int rc = doRetryOnConnPool ? 10 : 1;
+ while (true) {
+ try {
+ Connection dbConn = connPool.getConnection();
+ dbConn.setAutoCommit(false);
+ dbConn.setTransactionIsolation(isolationLevel);
+ return dbConn;
+ } catch (SQLException e){
+ if ((--rc) <= 0) throw e;
+ LOG.error("There is a problem with a connection from the pool, retrying", e);
+ }
+ }
}
void rollbackDBConn(Connection dbConn) {
@@ -1964,6 +1966,7 @@ public class TxnHandler {
config.setUser(user);
config.setPassword(passwd);
connPool = new BoneCPDataSource(config);
+ doRetryOnConnPool = true; // Enable retries to work around BONECP bug.
} else if ("dbcp".equals(connectionPooler)) {
ObjectPool objectPool = new GenericObjectPool();
ConnectionFactory connFactory = new DriverManagerConnectionFactory(driverUrl, user, passwd);
[09/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
index 2dad1e7..f44c80e 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
@@ -268,7 +268,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -288,7 +288,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -603,7 +603,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -623,7 +623,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -670,7 +670,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -690,7 +690,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -947,7 +947,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -967,7 +967,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1219,7 +1219,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1239,7 +1239,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1552,7 +1552,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1572,7 +1572,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1895,7 +1895,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1915,7 +1915,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2193,7 +2193,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2213,7 +2213,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2260,7 +2260,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2280,7 +2280,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2517,7 +2517,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2537,7 +2537,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2581,7 +2581,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2601,7 +2601,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2863,7 +2863,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2883,7 +2883,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3185,7 +3185,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3205,7 +3205,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3509,7 +3509,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3529,7 +3529,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3843,7 +3843,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3863,7 +3863,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4257,7 +4257,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4277,7 +4277,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4680,7 +4680,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4700,7 +4700,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4747,7 +4747,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4767,7 +4767,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5065,7 +5065,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5085,7 +5085,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5357,7 +5357,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5377,7 +5377,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5836,7 +5836,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5856,7 +5856,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -6454,7 +6454,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6474,7 +6474,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -6906,7 +6906,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6926,7 +6926,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7339,7 +7339,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7359,7 +7359,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7762,7 +7762,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7782,7 +7782,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8255,7 +8255,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8275,7 +8275,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8692,7 +8692,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8712,7 +8712,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_fast_stats.q.out b/ql/src/test/results/clientpositive/union_fast_stats.q.out
index 71a0486..1affbe1 100644
--- a/ql/src/test/results/clientpositive/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/union_fast_stats.q.out
@@ -120,7 +120,7 @@ Table Parameters:
numFiles 4
numRows 15
rawDataSize 3483
- totalSize 3915
+ totalSize 4211
#### A masked pattern was here ####
# Storage Information
@@ -172,8 +172,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 4
numRows 15
- rawDataSize 3483
- totalSize 3915
+ rawDataSize 3651
+ totalSize 4211
#### A masked pattern was here ####
# Storage Information
@@ -237,8 +237,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 5
numRows 20
- rawDataSize 4552
- totalSize 5225
+ rawDataSize 4720
+ totalSize 5568
#### A masked pattern was here ####
# Storage Information
@@ -393,7 +393,7 @@ Table Parameters:
numFiles 1
numRows 15
rawDataSize 3483
- totalSize 3176
+ totalSize 3223
#### A masked pattern was here ####
# Storage Information
@@ -446,7 +446,7 @@ Table Parameters:
numFiles 1
numRows 15
rawDataSize 3320
- totalSize 3176
+ totalSize 3223
#### A masked pattern was here ####
# Storage Information
@@ -511,7 +511,7 @@ Table Parameters:
numFiles 2
numRows 20
rawDataSize 4389
- totalSize 4486
+ totalSize 4580
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
index e65a880..5e6a72e 100644
--- a/ql/src/test/results/clientpositive/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
@@ -263,7 +263,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -283,7 +283,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -654,7 +654,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -674,7 +674,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1017,7 +1017,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1037,7 +1037,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1283,7 +1283,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1303,7 +1303,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1657,7 +1657,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1677,7 +1677,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2041,7 +2041,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2061,7 +2061,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2380,7 +2380,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2400,7 +2400,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2520,7 +2520,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2540,7 +2540,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2734,7 +2734,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2754,7 +2754,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2874,7 +2874,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2894,7 +2894,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3116,7 +3116,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3136,7 +3136,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3479,7 +3479,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3499,7 +3499,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3844,7 +3844,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3864,7 +3864,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4219,7 +4219,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4239,7 +4239,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4721,7 +4721,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4741,7 +4741,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5185,7 +5185,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5205,7 +5205,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5325,7 +5325,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5345,7 +5345,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5650,7 +5650,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5670,7 +5670,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5982,7 +5982,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6002,7 +6002,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -6501,7 +6501,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6521,7 +6521,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7251,7 +7251,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7271,7 +7271,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7791,7 +7791,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7811,7 +7811,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8359,7 +8359,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8379,7 +8379,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8870,7 +8870,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8890,7 +8890,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -9498,7 +9498,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -9518,7 +9518,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -10023,7 +10023,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -10043,7 +10043,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
[07/22] hive git commit: HIVE-11883 'transactional' table property
for ACID should be case insensitive (Eugene Koifman,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11883 'transactional' table property for ACID should be case insensitive (Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2c445cc8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2c445cc8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2c445cc8
Branch: refs/heads/llap
Commit: 2c445cc8dc0bedf2297725ab2404c9d866b5906e
Parents: a6ab68e
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:11:27 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:11:27 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 3 +++
ql/src/test/queries/clientpositive/update_all_types.q | 2 +-
ql/src/test/results/clientpositive/tez/update_all_types.q.out | 4 ++--
ql/src/test/results/clientpositive/update_all_types.q.out | 4 ++--
4 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index dda28b0..dbc6d8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12183,6 +12183,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (!SessionState.get().getTxnMgr().supportsAcid()) return false;
String tableIsTransactional =
tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
+ if(tableIsTransactional == null) {
+ tableIsTransactional = tab.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase());
+ }
return tableIsTransactional != null && tableIsTransactional.equalsIgnoreCase("true");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/queries/clientpositive/update_all_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/update_all_types.q b/ql/src/test/queries/clientpositive/update_all_types.q
index 262a304..0229845 100644
--- a/ql/src/test/queries/clientpositive/update_all_types.q
+++ b/ql/src/test/queries/clientpositive/update_all_types.q
@@ -17,7 +17,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE');
insert into table acid_uat
select ctinyint,
http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/results/clientpositive/tez/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/update_all_types.q.out b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/tez/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/tez/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@acid_uat
http://git-wip-us.apache.org/repos/asf/hive/blob/2c445cc8/ql/src/test/results/clientpositive/update_all_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/update_all_types.q.out b/ql/src/test/results/clientpositive/update_all_types.q.out
index ca098fb..1cfa088 100644
--- a/ql/src/test/results/clientpositive/update_all_types.q.out
+++ b/ql/src/test/results/clientpositive/update_all_types.q.out
@@ -13,7 +13,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@acid_uat
@@ -32,7 +32,7 @@ create table acid_uat(ti tinyint,
s string,
vc varchar(128),
ch char(36),
- b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+ b boolean) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('TRANSACTIONAL'='TRUE')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@acid_uat
[17/22] hive git commit: HIVE-11898 : support default partition in
metastoredirectsql (Sergey Shelukhin, reviewed by Sushanth Sowmyan)
Posted by se...@apache.org.
HIVE-11898 : support default partition in metastoredirectsql (Sergey Shelukhin, reviewed by Sushanth Sowmyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/116c3e3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/116c3e3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/116c3e3b
Branch: refs/heads/llap
Commit: 116c3e3b2d2b2b22fef9abed8f092b04155784cd
Parents: a953b43
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 14:11:58 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 14:11:58 2015 -0700
----------------------------------------------------------------------
.../hive/metastore/MetaStoreDirectSql.java | 34 +++++++++++++-------
1 file changed, 22 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/116c3e3b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 1f89b7c..95b1ccc 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -109,6 +109,7 @@ class MetaStoreDirectSql {
private final DB dbType;
private final int batchSize;
private final boolean convertMapNullsToEmptyStrings;
+ private final String defaultPartName;
/**
* Whether direct SQL can be used with the current datastore backing {@link #pm}.
@@ -116,6 +117,7 @@ class MetaStoreDirectSql {
private final boolean isCompatibleDatastore;
private final boolean isAggregateStatsCacheEnabled;
private AggregateStatsCache aggrStatsCache;
+
public MetaStoreDirectSql(PersistenceManager pm, Configuration conf) {
this.pm = pm;
this.dbType = determineDbType();
@@ -127,6 +129,7 @@ class MetaStoreDirectSql {
convertMapNullsToEmptyStrings =
HiveConf.getBoolVar(conf, ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS);
+ defaultPartName = HiveConf.getVar(conf, ConfVars.DEFAULTPARTITIONNAME);
String jdoIdFactory = HiveConf.getVar(conf, ConfVars.METASTORE_IDENTIFIER_FACTORY);
if (! ("datanucleus1".equalsIgnoreCase(jdoIdFactory))){
@@ -390,7 +393,7 @@ class MetaStoreDirectSql {
// Derby and Oracle do not interpret filters ANSI-properly in some cases and need a workaround.
boolean dbHasJoinCastBug = (dbType == DB.DERBY || dbType == DB.ORACLE);
String sqlFilter = PartitionFilterGenerator.generateSqlFilter(
- table, tree, params, joins, dbHasJoinCastBug);
+ table, tree, params, joins, dbHasJoinCastBug, defaultPartName);
if (sqlFilter == null) {
return null; // Cannot make SQL filter to push down.
}
@@ -490,8 +493,8 @@ class MetaStoreDirectSql {
}
List<Object> sqlResult = executeWithArray(query, params, queryText);
long queryTime = doTrace ? System.nanoTime() : 0;
+ timingTrace(doTrace, queryText, start, queryTime);
if (sqlResult.isEmpty()) {
- timingTrace(doTrace, queryText, start, queryTime);
return new ArrayList<Partition>(); // no partitions, bail early.
}
@@ -508,7 +511,6 @@ class MetaStoreDirectSql {
result = getPartitionsFromPartitionIds(dbName, tblName, isView, sqlResult);
}
- timingTrace(doTrace, queryText, start, queryTime);
query.closeAll();
return result;
}
@@ -921,14 +923,16 @@ class MetaStoreDirectSql {
private final List<Object> params;
private final List<String> joins;
private final boolean dbHasJoinCastBug;
+ private final String defaultPartName;
- private PartitionFilterGenerator(
- Table table, List<Object> params, List<String> joins, boolean dbHasJoinCastBug) {
+ private PartitionFilterGenerator(Table table, List<Object> params, List<String> joins,
+ boolean dbHasJoinCastBug, String defaultPartName) {
this.table = table;
this.params = params;
this.joins = joins;
this.dbHasJoinCastBug = dbHasJoinCastBug;
this.filterBuffer = new FilterBuilder(false);
+ this.defaultPartName = defaultPartName;
}
/**
@@ -939,13 +943,14 @@ class MetaStoreDirectSql {
* @return the string representation of the expression tree
*/
private static String generateSqlFilter(Table table, ExpressionTree tree,
- List<Object> params, List<String> joins, boolean dbHasJoinCastBug) throws MetaException {
+ List<Object> params, List<String> joins, boolean dbHasJoinCastBug, String defaultPartName)
+ throws MetaException {
assert table != null;
if (tree.getRoot() == null) {
return "";
}
PartitionFilterGenerator visitor = new PartitionFilterGenerator(
- table, params, joins, dbHasJoinCastBug);
+ table, params, joins, dbHasJoinCastBug, defaultPartName);
tree.accept(visitor);
if (visitor.filterBuffer.hasError()) {
LOG.info("Unable to push down SQL filter: " + visitor.filterBuffer.getErrorMessage());
@@ -1071,28 +1076,33 @@ class MetaStoreDirectSql {
// Build the filter and add parameters linearly; we are traversing leaf nodes LTR.
String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\"";
+
if (node.isReverseOrder) {
params.add(nodeValue);
}
+ String tableColumn = tableValue;
if (colType != FilterType.String) {
// The underlying database field is varchar, we need to compare numbers.
- // Note that this won't work with __HIVE_DEFAULT_PARTITION__. It will fail and fall
- // back to JDO. That is by design; we could add an ugly workaround here but didn't.
if (colType == FilterType.Integral) {
tableValue = "cast(" + tableValue + " as decimal(21,0))";
} else if (colType == FilterType.Date) {
tableValue = "cast(" + tableValue + " as date)";
}
+ // Workaround for HIVE_DEFAULT_PARTITION - ignore it like JDO does, for now.
+ String tableValue0 = tableValue;
+ tableValue = "(case when " + tableColumn + " <> ?";
+ params.add(defaultPartName);
+
if (dbHasJoinCastBug) {
// This is a workaround for DERBY-6358 and Oracle bug; it is pretty horrible.
- tableValue = "(case when \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? and "
+ tableValue += (" and \"TBLS\".\"TBL_NAME\" = ? and \"DBS\".\"NAME\" = ? and "
+ "\"FILTER" + partColIndex + "\".\"PART_ID\" = \"PARTITIONS\".\"PART_ID\" and "
- + "\"FILTER" + partColIndex + "\".\"INTEGER_IDX\" = " + partColIndex + " then "
- + tableValue + " else null end)";
+ + "\"FILTER" + partColIndex + "\".\"INTEGER_IDX\" = " + partColIndex);
params.add(table.getTableName().toLowerCase());
params.add(table.getDbName().toLowerCase());
}
+ tableValue += " then " + tableValue0 + " else null end)";
}
if (!node.isReverseOrder) {
params.add(nodeValue);
[11/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index f451fce..69cb6ff 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -1106,6 +1106,8 @@ public class TestInputOutputFormat {
@SuppressWarnings("unchecked,deprecation")
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
+ properties.setProperty("columns", "x,y");
+ properties.setProperty("columns.types", "int:int");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector)
@@ -1122,8 +1124,6 @@ public class TestInputOutputFormat {
writer.write(serde.serialize(new MyRow(3,2), inspector));
writer.close(true);
serde = new OrcSerde();
- properties.setProperty("columns", "x,y");
- properties.setProperty("columns.types", "int:int");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
@@ -1295,13 +1295,13 @@ public class TestInputOutputFormat {
@SuppressWarnings("deprecation")
public void testEmptyFile() throws Exception {
Properties properties = new Properties();
+ properties.setProperty("columns", "x,y");
+ properties.setProperty("columns.types", "int:int");
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer =
outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
properties, Reporter.NULL);
writer.close(true);
- properties.setProperty("columns", "x,y");
- properties.setProperty("columns.types", "int:int");
SerDe serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
InputFormat<?,?> in = new OrcInputFormat();
@@ -1352,6 +1352,8 @@ public class TestInputOutputFormat {
@SuppressWarnings("unchecked,deprecation")
public void testDefaultTypes() throws Exception {
Properties properties = new Properties();
+ properties.setProperty("columns", "str,str2");
+ properties.setProperty("columns.types", "string:string");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector)
@@ -1371,7 +1373,6 @@ public class TestInputOutputFormat {
writer.write(serde.serialize(new StringRow("miles"), inspector));
writer.close(true);
serde = new OrcSerde();
- properties.setProperty("columns", "str,str2");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
@@ -1892,6 +1893,8 @@ public class TestInputOutputFormat {
@SuppressWarnings("unchecked,deprecation")
public void testSplitElimination() throws Exception {
Properties properties = new Properties();
+ properties.setProperty("columns", "z,r");
+ properties.setProperty("columns.types", "int:struct<x:int,y:int>");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector)
@@ -1920,8 +1923,6 @@ public class TestInputOutputFormat {
.build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
- properties.setProperty("columns", "z,r");
- properties.setProperty("columns.types", "int:struct<x:int,y:int>");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?,?> in = new OrcInputFormat();
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 0bb8401..06e3362 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@ -519,9 +519,9 @@ public class TestOrcFile {
Object row = rows.next(null);
assertEquals(tslist.get(idx++).getNanos(), ((TimestampWritable) row).getNanos());
}
- assertEquals(1, OrcUtils.getFlattenedColumnsCount(inspector));
+ assertEquals(0, writer.getSchema().getMaximumId());
boolean[] expected = new boolean[] {false};
- boolean[] included = OrcUtils.includeColumns("", "ts", inspector);
+ boolean[] included = OrcUtils.includeColumns("", writer.getSchema());
assertEquals(true, Arrays.equals(expected, included));
}
@@ -546,17 +546,18 @@ public class TestOrcFile {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ TypeDescription schema = writer.getSchema();
+ assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, true};
- boolean[] included = OrcUtils.includeColumns("string1", "bytes1,string1", inspector);
+ boolean[] included = OrcUtils.includeColumns("string1", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, false, false};
- included = OrcUtils.includeColumns("", "bytes1,string1", inspector);
+ included = OrcUtils.includeColumns("", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, false, false};
- included = OrcUtils.includeColumns(null, "bytes1,string1", inspector);
+ included = OrcUtils.includeColumns(null, schema);
assertEquals(true, Arrays.equals(expected, included));
// check the stats
@@ -656,9 +657,10 @@ public class TestOrcFile {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ TypeDescription schema = writer.getSchema();
+ assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] {false, true, false};
- boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+ boolean[] included = OrcUtils.includeColumns("int1", schema);
assertEquals(true, Arrays.equals(expected, included));
Metadata metadata = reader.getMetadata();
@@ -742,14 +744,14 @@ public class TestOrcFile {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(24, OrcUtils.getFlattenedColumnsCount(inspector));
+ TypeDescription schema = writer.getSchema();
+ assertEquals(23, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, false, false, false,
false, false, false, false, false,
false, false, false, false, false,
false, false, false, false, false,
false, false, false, false};
- boolean[] included = OrcUtils.includeColumns("",
- "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ boolean[] included = OrcUtils.includeColumns("", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, true, false, false, false,
@@ -757,8 +759,7 @@ public class TestOrcFile {
true, true, true, true, true,
false, false, false, false, true,
true, true, true, true};
- included = OrcUtils.includeColumns("boolean1,string1,middle,map",
- "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, true, false, false, false,
@@ -766,8 +767,7 @@ public class TestOrcFile {
true, true, true, true, true,
false, false, false, false, true,
true, true, true, true};
- included = OrcUtils.includeColumns("boolean1,string1,middle,map",
- "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ included = OrcUtils.includeColumns("boolean1,string1,middle,map", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, true, true, true, true,
@@ -777,7 +777,7 @@ public class TestOrcFile {
true, true, true, true};
included = OrcUtils.includeColumns(
"boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
- "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ schema);
assertEquals(true, Arrays.equals(expected, included));
Metadata metadata = reader.getMetadata();
@@ -1312,17 +1312,18 @@ public class TestOrcFile {
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(6, OrcUtils.getFlattenedColumnsCount(inspector));
+ TypeDescription schema = writer.getSchema();
+ assertEquals(5, schema.getMaximumId());
boolean[] expected = new boolean[] {false, false, false, false, false, false};
- boolean[] included = OrcUtils.includeColumns("", "time,union,decimal", inspector);
+ boolean[] included = OrcUtils.includeColumns("", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, true, false, false, false, true};
- included = OrcUtils.includeColumns("time,decimal", "time,union,decimal", inspector);
+ included = OrcUtils.includeColumns("time,decimal", schema);
assertEquals(true, Arrays.equals(expected, included));
expected = new boolean[] {false, false, true, true, true, false};
- included = OrcUtils.includeColumns("union", "time,union,decimal", inspector);
+ included = OrcUtils.includeColumns("union", schema);
assertEquals(true, Arrays.equals(expected, included));
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
index 39f71f1..60af40a 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java
@@ -896,7 +896,7 @@ public class TestOrcRawRecordMerger {
*/
@Test
public void testRecordReaderNewBaseAndDelta() throws Exception {
- final int BUCKET = 10;
+ final int BUCKET = 11;
Configuration conf = new Configuration();
OrcOutputFormat of = new OrcOutputFormat();
FileSystem fs = FileSystem.getLocal(conf);
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
index a3d3ec5..f838cbc 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcWideTable.java
@@ -38,241 +38,45 @@ import org.junit.rules.TestName;
public class TestOrcWideTable {
- private static final int MEMORY_FOR_ORC = 512 * 1024 * 1024;
- Path workDir = new Path(System.getProperty("test.tmp.dir", "target" + File.separator + "test"
- + File.separator + "tmp"));
-
- Configuration conf;
- FileSystem fs;
- Path testFilePath;
- float memoryPercent;
-
- @Rule
- public TestName testCaseName = new TestName();
-
- @Before
- public void openFileSystem() throws Exception {
- conf = new Configuration();
- fs = FileSystem.getLocal(conf);
- testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
- fs.delete(testFilePath, false);
- // make sure constant memory is available for ORC always
- memoryPercent = (float) MEMORY_FOR_ORC / (float) ManagementFactory.getMemoryMXBean().
- getHeapMemoryUsage().getMax();
- conf.setFloat(HiveConf.ConfVars.HIVE_ORC_FILE_MEMORY_POOL.varname, memoryPercent);
- }
-
@Test
public void testBufferSizeFor1Col() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 128 * 1024;
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(bufferSize, newBufferSize);
- }
+ assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ false, 1, 128*1024));
}
@Test
public void testBufferSizeFor1000Col() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 128 * 1024;
- String columns = getRandomColumnNames(1000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(bufferSize, newBufferSize);
- }
+ assertEquals(128 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ false, 1000, 128*1024));
}
@Test
public void testBufferSizeFor2000Col() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 256 * 1024;
- String columns = getRandomColumnNames(2000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(32 * 1024, newBufferSize);
- }
+ assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ true, 2000, 256*1024));
}
@Test
public void testBufferSizeFor2000ColNoCompression() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 256 * 1024;
- String columns = getRandomColumnNames(2000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(64 * 1024, newBufferSize);
- }
+ assertEquals(64 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ false, 2000, 256*1024));
}
@Test
public void testBufferSizeFor4000Col() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 256 * 1024;
- String columns = getRandomColumnNames(4000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.ZLIB).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(16 * 1024, newBufferSize);
- }
+ assertEquals(16 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ true, 4000, 256*1024));
}
@Test
public void testBufferSizeFor4000ColNoCompression() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 256 * 1024;
- String columns = getRandomColumnNames(4000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(32 * 1024, newBufferSize);
- }
+ assertEquals(32 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ false, 4000, 256*1024));
}
@Test
public void testBufferSizeFor25000Col() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 256 * 1024;
- String columns = getRandomColumnNames(25000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- // 4K is the minimum buffer size
- assertEquals(4 * 1024, newBufferSize);
- }
- }
-
- @Test
- public void testBufferSizeManualOverride1() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 1024;
- String columns = getRandomColumnNames(2000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(bufferSize, newBufferSize);
- }
- }
-
- @Test
- public void testBufferSizeManualOverride2() throws IOException {
- ObjectInspector inspector;
- synchronized (TestOrcFile.class) {
- inspector = ObjectInspectorFactory.getReflectionObjectInspector(Long.class,
- ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- }
- int bufferSize = 2 * 1024;
- String columns = getRandomColumnNames(4000);
- // just for testing. manually write the column names
- conf.set(IOConstants.COLUMNS, columns);
- Writer writer = OrcFile.createWriter(
- testFilePath,
- OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000)
- .compress(CompressionKind.NONE).bufferSize(bufferSize));
- final int newBufferSize;
- if (writer instanceof WriterImpl) {
- WriterImpl orcWriter = (WriterImpl) writer;
- newBufferSize = orcWriter.getEstimatedBufferSize(bufferSize);
- assertEquals(bufferSize, newBufferSize);
- }
- }
-
- private String getRandomColumnNames(int n) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < n - 1; i++) {
- sb.append("col").append(i).append(",");
- }
- sb.append("col").append(n - 1);
- return sb.toString();
+ assertEquals(4 * 1024, WriterImpl.getEstimatedBufferSize(512 * 1024 * 1024,
+ false, 25000, 256*1024));
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
new file mode 100644
index 0000000..c3095f7
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestTypeDescription.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class TestTypeDescription {
+
+ @Test
+ public void testJson() {
+ TypeDescription bin = TypeDescription.createBinary();
+ assertEquals("{\"category\": \"binary\", \"id\": 0, \"max\": 0}",
+ bin.toJson());
+ assertEquals("binary", bin.toString());
+ TypeDescription struct = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createInt())
+ .addField("f2", TypeDescription.createString())
+ .addField("f3", TypeDescription.createDecimal());
+ assertEquals("struct<f1:int,f2:string,f3:decimal(38,10)>",
+ struct.toString());
+ assertEquals("{\"category\": \"struct\", \"id\": 0, \"max\": 3, \"fields\": [\n"
+ + " \"f1\": {\"category\": \"int\", \"id\": 1, \"max\": 1},\n"
+ + " \"f2\": {\"category\": \"string\", \"id\": 2, \"max\": 2},\n"
+ + " \"f3\": {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 38, \"scale\": 10}]}",
+ struct.toJson());
+ struct = TypeDescription.createStruct()
+ .addField("f1", TypeDescription.createUnion()
+ .addUnionChild(TypeDescription.createByte())
+ .addUnionChild(TypeDescription.createDecimal()
+ .withPrecision(20).withScale(10)))
+ .addField("f2", TypeDescription.createStruct()
+ .addField("f3", TypeDescription.createDate())
+ .addField("f4", TypeDescription.createDouble())
+ .addField("f5", TypeDescription.createBoolean()))
+ .addField("f6", TypeDescription.createChar().withMaxLength(100));
+ assertEquals("struct<f1:union<tinyint,decimal(20,10)>,f2:struct<f3:date,f4:double,f5:boolean>,f6:char(100)>",
+ struct.toString());
+ assertEquals(
+ "{\"category\": \"struct\", \"id\": 0, \"max\": 8, \"fields\": [\n" +
+ " \"f1\": {\"category\": \"union\", \"id\": 1, \"max\": 3, \"children\": [\n" +
+ " {\"category\": \"tinyint\", \"id\": 2, \"max\": 2},\n" +
+ " {\"category\": \"decimal\", \"id\": 3, \"max\": 3, \"precision\": 20, \"scale\": 10}]},\n" +
+ " \"f2\": {\"category\": \"struct\", \"id\": 4, \"max\": 7, \"fields\": [\n" +
+ " \"f3\": {\"category\": \"date\", \"id\": 5, \"max\": 5},\n" +
+ " \"f4\": {\"category\": \"double\", \"id\": 6, \"max\": 6},\n" +
+ " \"f5\": {\"category\": \"boolean\", \"id\": 7, \"max\": 7}]},\n" +
+ " \"f6\": {\"category\": \"char\", \"id\": 8, \"max\": 8, \"length\": 100}]}",
+ struct.toJson());
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter.out b/ql/src/test/resources/orc-file-dump-bloomfilter.out
index add163c..19a2f65 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 21000
Compression: ZLIB
Compression size: 10000
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-bloomfilter2.out b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
index 06b65ce..a37408c 100644
--- a/ql/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/ql/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 21000
Compression: ZLIB
Compression size: 10000
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
index 13e316e..73f9f05 100644
--- a/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 21000
Compression: ZLIB
Compression size: 10000
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.json b/ql/src/test/resources/orc-file-dump.json
index fe31d5e..14cf962 100644
--- a/ql/src/test/resources/orc-file-dump.json
+++ b/ql/src/test/resources/orc-file-dump.json
@@ -1,7 +1,7 @@
{
"fileName": "TestFileDump.testDump.orc",
"fileVersion": "0.12",
- "writerVersion": "HIVE_8732",
+ "writerVersion": "HIVE_4243",
"numberOfRows": 21000,
"compression": "ZLIB",
"compressionBufferSize": 10000,
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-dump.out b/ql/src/test/resources/orc-file-dump.out
index 2f5962b..28935ba 100644
--- a/ql/src/test/resources/orc-file-dump.out
+++ b/ql/src/test/resources/orc-file-dump.out
@@ -1,5 +1,5 @@
Structure for TestFileDump.testDump.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 21000
Compression: ZLIB
Compression size: 10000
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/ql/src/test/resources/orc-file-has-null.out b/ql/src/test/resources/orc-file-has-null.out
index fdc3862..9c4e83c 100644
--- a/ql/src/test/resources/orc-file-has-null.out
+++ b/ql/src/test/resources/orc-file-has-null.out
@@ -1,5 +1,5 @@
Structure for TestOrcFile.testHasNull.orc
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 20000
Compression: ZLIB
Compression size: 10000
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 2bec917..cf523cb 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -98,7 +98,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 6 Data size: 767 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 6 Data size: 780 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -133,11 +133,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 3 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 3 Data size: 342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 348 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index 87e2fa6..ebc6c5b 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -89,11 +89,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 3 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 384 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 394 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- table level analyze statistics
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
index 6f79d69..2f12b8d 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
@@ -894,7 +894,7 @@ Partition Parameters:
numFiles 2
numRows 32
rawDataSize 640
- totalSize 1400
+ totalSize 1392
#### A masked pattern was here ####
# Storage Information
@@ -936,7 +936,7 @@ Partition Parameters:
numFiles 2
numRows 6
rawDataSize 120
- totalSize 1102
+ totalSize 1096
#### A masked pattern was here ####
# Storage Information
@@ -978,7 +978,7 @@ Partition Parameters:
numFiles 2
numRows 14
rawDataSize 280
- totalSize 1216
+ totalSize 1210
#### A masked pattern was here ####
# Storage Information
@@ -1020,7 +1020,7 @@ Partition Parameters:
numFiles 2
numRows 6
rawDataSize 120
- totalSize 1102
+ totalSize 1096
#### A masked pattern was here ####
# Storage Information
@@ -1061,7 +1061,7 @@ Partition Parameters:
numFiles 8
numRows 32
rawDataSize 640
- totalSize 4548
+ totalSize 4524
#### A masked pattern was here ####
# Storage Information
@@ -1102,7 +1102,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2400
#### A masked pattern was here ####
# Storage Information
@@ -1143,7 +1143,7 @@ Partition Parameters:
numFiles 8
numRows 32
rawDataSize 640
- totalSize 4534
+ totalSize 4510
#### A masked pattern was here ####
# Storage Information
@@ -1184,7 +1184,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2400
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
index cb0eb58..24ac550 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
@@ -1157,7 +1157,7 @@ Partition Parameters:
numFiles 1
numRows 11
rawDataSize 88
- totalSize 433
+ totalSize 454
#### A masked pattern was here ####
# Storage Information
@@ -1217,7 +1217,7 @@ Partition Parameters:
numFiles 1
numRows 13
rawDataSize 104
- totalSize 456
+ totalSize 477
#### A masked pattern was here ####
# Storage Information
@@ -1374,7 +1374,7 @@ Partition Parameters:
numFiles 1
numRows 11
rawDataSize 88
- totalSize 433
+ totalSize 454
#### A masked pattern was here ####
# Storage Information
@@ -1434,7 +1434,7 @@ Partition Parameters:
numFiles 1
numRows 13
rawDataSize 104
- totalSize 456
+ totalSize 477
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
index 0f6b15d..f87a539 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out
@@ -136,7 +136,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -179,7 +179,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 382
+ totalSize 389
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -269,7 +269,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -312,7 +312,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 382
+ totalSize 389
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -469,7 +469,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 277
+ totalSize 281
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -513,7 +513,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 279
+ totalSize 283
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -557,7 +557,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -601,7 +601,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -688,7 +688,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 277
+ totalSize 281
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -732,7 +732,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 279
+ totalSize 283
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -776,7 +776,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -820,7 +820,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
index 1fdeb90..5903cd1 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -196,7 +196,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 383
+ totalSize 390
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -239,7 +239,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 404
+ totalSize 410
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -282,7 +282,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 412
+ totalSize 419
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -372,7 +372,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -415,7 +415,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 383
+ totalSize 390
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -458,7 +458,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 404
+ totalSize 410
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -501,7 +501,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 412
+ totalSize 419
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -604,7 +604,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -647,7 +647,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 383
+ totalSize 390
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -690,7 +690,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 404
+ totalSize 410
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -733,7 +733,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 412
+ totalSize 419
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -819,7 +819,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 362
+ totalSize 369
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -862,7 +862,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 383
+ totalSize 390
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -905,7 +905,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 404
+ totalSize 410
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -948,7 +948,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, i32 locid, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 412
+ totalSize 419
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1106,7 +1106,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 281
+ totalSize 286
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1150,7 +1150,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 291
+ totalSize 295
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1194,7 +1194,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 298
+ totalSize 302
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1238,7 +1238,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 277
+ totalSize 281
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1282,7 +1282,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 272
+ totalSize 276
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1326,7 +1326,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 283
+ totalSize 288
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1370,7 +1370,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 292
+ totalSize 297
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1414,7 +1414,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1458,7 +1458,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1502,7 +1502,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 294
+ totalSize 298
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1546,7 +1546,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 291
+ totalSize 295
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1633,7 +1633,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 281
+ totalSize 286
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1677,7 +1677,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 291
+ totalSize 295
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1721,7 +1721,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 298
+ totalSize 302
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1765,7 +1765,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 277
+ totalSize 281
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1809,7 +1809,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 272
+ totalSize 276
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1853,7 +1853,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 283
+ totalSize 288
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1897,7 +1897,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 292
+ totalSize 297
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1941,7 +1941,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1985,7 +1985,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 261
+ totalSize 265
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2029,7 +2029,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 294
+ totalSize 298
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2073,7 +2073,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 291
+ totalSize 295
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
index 81ac963..2ea1e6e 100644
--- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
+++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial_ndv.q.out
@@ -230,7 +230,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 521
+ totalSize 531
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -273,7 +273,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 553
+ totalSize 562
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -316,7 +316,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 571
+ totalSize 580
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -359,7 +359,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 593
+ totalSize 602
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -543,7 +543,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 521
+ totalSize 531
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -586,7 +586,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 553
+ totalSize 562
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -629,7 +629,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 571
+ totalSize 580
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -672,7 +672,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_1d { string state, double locid, decimal(10,0) cnt, i32 zip}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 593
+ totalSize 602
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -910,7 +910,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 379
+ totalSize 386
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -954,7 +954,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 402
+ totalSize 409
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -998,7 +998,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 417
+ totalSize 423
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1042,7 +1042,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 376
+ totalSize 383
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1086,7 +1086,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 386
+ totalSize 394
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1130,7 +1130,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 380
+ totalSize 387
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1174,7 +1174,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 403
+ totalSize 409
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1218,7 +1218,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 359
+ totalSize 366
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1262,7 +1262,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 354
+ totalSize 361
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1306,7 +1306,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 405
+ totalSize 412
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1350,7 +1350,7 @@ STAGE PLANS:
serialization.ddl struct loc_orc_2d { string state, i32 locid, decimal(10,0) cnt}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 405
+ totalSize 412
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_analyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_analyze.q.out b/ql/src/test/results/clientpositive/orc_analyze.q.out
index 6eb9a93..bc46852 100644
--- a/ql/src/test/results/clientpositive/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/orc_analyze.q.out
@@ -106,7 +106,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -154,7 +154,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -202,7 +202,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -291,7 +291,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -405,7 +405,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -448,7 +448,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -503,7 +503,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -546,7 +546,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -601,7 +601,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -644,7 +644,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -744,7 +744,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -787,7 +787,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -907,7 +907,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -950,7 +950,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1005,7 +1005,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1048,7 +1048,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1103,7 +1103,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1146,7 +1146,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1252,7 +1252,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1295,7 +1295,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1460,7 +1460,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1560,7 +1560,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1660,7 +1660,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_file_dump.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_file_dump.q.out b/ql/src/test/results/clientpositive/orc_file_dump.q.out
index 67aa189..c494d47 100644
--- a/ql/src/test/results/clientpositive/orc_file_dump.q.out
+++ b/ql/src/test/results/clientpositive/orc_file_dump.q.out
@@ -93,11 +93,11 @@ PREHOOK: Input: default@orc_ppd
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 1049
Compression: ZLIB
Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
Stripe Statistics:
Stripe 1:
@@ -192,7 +192,7 @@ Stripes:
Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
-File length: 33456 bytes
+File length: 33458 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -211,11 +211,11 @@ PREHOOK: Input: default@orc_ppd
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 1049
Compression: ZLIB
Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
Stripe Statistics:
Stripe 1:
@@ -310,7 +310,7 @@ Stripes:
Entry 1: numHashFunctions: 7 bitCount: 9600 popCount: 285 loadFactor: 0.0297 expectedFpp: 2.0324289E-11
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 849 loadFactor: 0.0884 expectedFpp: 4.231118E-8
-File length: 38610 bytes
+File length: 38613 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
@@ -341,11 +341,11 @@ PREHOOK: Input: default@orc_ppd_part@ds=2015/hr=10
#### A masked pattern was here ####
-- BEGIN ORC FILE DUMP --
#### A masked pattern was here ####
-File Version: 0.12 with HIVE_8732
+File Version: 0.12 with HIVE_4243
Rows: 1049
Compression: ZLIB
Compression size: 262144
-Type: struct<_col0:tinyint,_col1:smallint,_col2:int,_col3:bigint,_col4:float,_col5:double,_col6:boolean,_col7:string,_col8:timestamp,_col9:decimal(4,2),_col10:binary>
+Type: struct<t:tinyint,si:smallint,i:int,b:bigint,f:float,d:double,bo:boolean,s:string,ts:timestamp,dec:decimal(4,2),bin:binary>
Stripe Statistics:
Stripe 1:
@@ -440,7 +440,7 @@ Stripes:
Entry 1: numHashFunctions: 4 bitCount: 6272 popCount: 168 loadFactor: 0.0268 expectedFpp: 5.147697E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 492 loadFactor: 0.0784 expectedFpp: 3.7864847E-5
-File length: 33456 bytes
+File length: 33458 bytes
Padding length: 0 bytes
Padding ratio: 0%
-- END ORC FILE DUMP --
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
index d26dff2..03e2f7f 100644
--- a/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
+++ b/ql/src/test/results/clientpositive/orc_int_type_promotion.q.out
@@ -220,14 +220,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypes_orc
- Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 88 Data size: 1772 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 88 Data size: 1766 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
[10/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.
Posted by se...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 32514ca..100a3d9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -268,7 +268,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -288,7 +288,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -603,7 +603,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -623,7 +623,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -670,7 +670,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -690,7 +690,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -946,7 +946,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -966,7 +966,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1218,7 +1218,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1238,7 +1238,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1551,7 +1551,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1571,7 +1571,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -1894,7 +1894,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1914,7 +1914,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2192,7 +2192,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2212,7 +2212,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2259,7 +2259,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2279,7 +2279,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2515,7 +2515,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2535,7 +2535,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2579,7 +2579,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2599,7 +2599,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -2860,7 +2860,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -2880,7 +2880,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3182,7 +3182,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3202,7 +3202,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3506,7 +3506,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3526,7 +3526,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -3840,7 +3840,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -3860,7 +3860,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4254,7 +4254,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4274,7 +4274,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4677,7 +4677,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4697,7 +4697,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -4744,7 +4744,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -4764,7 +4764,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5061,7 +5061,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5081,7 +5081,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5353,7 +5353,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5373,7 +5373,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -5832,7 +5832,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -5852,7 +5852,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -6464,7 +6464,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6484,7 +6484,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -6916,7 +6916,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -6936,7 +6936,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7349,7 +7349,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7369,7 +7369,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -7772,7 +7772,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -7792,7 +7792,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8265,7 +8265,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8285,7 +8285,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
@@ -8702,7 +8702,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -8722,7 +8722,7 @@ STAGE PLANS:
serialization.ddl struct part_orc { i32 p_partkey, string p_name, string p_mfgr, string p_brand, string p_type, i32 p_size, string p_container, double p_retailprice, string p_comment}
serialization.format 1
serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
- totalSize 2639
+ totalSize 2689
#### A masked pattern was here ####
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.part_orc
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
index 6e33a86..984ef69 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
@@ -946,7 +946,7 @@ Partition Parameters:
numFiles 2
numRows 32
rawDataSize 640
- totalSize 1400
+ totalSize 1392
#### A masked pattern was here ####
# Storage Information
@@ -988,7 +988,7 @@ Partition Parameters:
numFiles 2
numRows 6
rawDataSize 120
- totalSize 1102
+ totalSize 1096
#### A masked pattern was here ####
# Storage Information
@@ -1030,7 +1030,7 @@ Partition Parameters:
numFiles 2
numRows 14
rawDataSize 280
- totalSize 1216
+ totalSize 1210
#### A masked pattern was here ####
# Storage Information
@@ -1072,7 +1072,7 @@ Partition Parameters:
numFiles 2
numRows 6
rawDataSize 120
- totalSize 1102
+ totalSize 1096
#### A masked pattern was here ####
# Storage Information
@@ -1113,7 +1113,7 @@ Partition Parameters:
numFiles 8
numRows 32
rawDataSize 640
- totalSize 4548
+ totalSize 4524
#### A masked pattern was here ####
# Storage Information
@@ -1154,7 +1154,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2400
#### A masked pattern was here ####
# Storage Information
@@ -1195,7 +1195,7 @@ Partition Parameters:
numFiles 8
numRows 32
rawDataSize 640
- totalSize 4534
+ totalSize 4510
#### A masked pattern was here ####
# Storage Information
@@ -1236,7 +1236,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2400
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
index 5bba0cb..ef09bea 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
@@ -1208,7 +1208,7 @@ Partition Parameters:
numFiles 1
numRows 11
rawDataSize 88
- totalSize 433
+ totalSize 454
#### A masked pattern was here ####
# Storage Information
@@ -1268,7 +1268,7 @@ Partition Parameters:
numFiles 1
numRows 13
rawDataSize 104
- totalSize 456
+ totalSize 477
#### A masked pattern was here ####
# Storage Information
@@ -1436,7 +1436,7 @@ Partition Parameters:
numFiles 1
numRows 11
rawDataSize 88
- totalSize 433
+ totalSize 454
#### A masked pattern was here ####
# Storage Information
@@ -1496,7 +1496,7 @@ Partition Parameters:
numFiles 1
numRows 13
rawDataSize 104
- totalSize 456
+ totalSize 477
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
index 6eb9a93..bc46852 100644
--- a/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
+++ b/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
@@ -106,7 +106,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -154,7 +154,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -202,7 +202,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -291,7 +291,7 @@ Table Parameters:
numFiles 1
numRows 100
rawDataSize 52600
- totalSize 3174
+ totalSize 3202
#### A masked pattern was here ####
# Storage Information
@@ -405,7 +405,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -448,7 +448,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -503,7 +503,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -546,7 +546,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -601,7 +601,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -644,7 +644,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -744,7 +744,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -787,7 +787,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -907,7 +907,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -950,7 +950,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1005,7 +1005,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1048,7 +1048,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1103,7 +1103,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1146,7 +1146,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1252,7 +1252,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1295,7 +1295,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 22050
- totalSize 2088
+ totalSize 2118
#### A masked pattern was here ####
# Storage Information
@@ -1460,7 +1460,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1560,7 +1560,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
@@ -1660,7 +1660,7 @@ Partition Parameters:
numFiles 1
numRows 50
rawDataSize 21950
- totalSize 2073
+ totalSize 2102
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
index d0d82a0..9cb7a84 100644
--- a/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/tez/union_fast_stats.q.out
@@ -120,7 +120,7 @@ Table Parameters:
numFiles 4
numRows 0
rawDataSize 0
- totalSize 3915
+ totalSize 4211
#### A masked pattern was here ####
# Storage Information
@@ -172,8 +172,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 4
numRows 15
- rawDataSize 3483
- totalSize 3915
+ rawDataSize 3651
+ totalSize 4211
#### A masked pattern was here ####
# Storage Information
@@ -237,8 +237,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 5
numRows 20
- rawDataSize 4552
- totalSize 5225
+ rawDataSize 4720
+ totalSize 5568
#### A masked pattern was here ####
# Storage Information
@@ -393,7 +393,7 @@ Table Parameters:
numFiles 1
numRows 5
rawDataSize 1069
- totalSize 3177
+ totalSize 3224
#### A masked pattern was here ####
# Storage Information
@@ -446,7 +446,7 @@ Table Parameters:
numFiles 1
numRows 15
rawDataSize 3320
- totalSize 3177
+ totalSize 3224
#### A masked pattern was here ####
# Storage Information
@@ -511,7 +511,7 @@ Table Parameters:
numFiles 2
numRows 20
rawDataSize 4389
- totalSize 4487
+ totalSize 4581
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
index 664f74a..92e7163 100644
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
@@ -183,11 +183,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -197,11 +197,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
input vertices:
1 Map 2
- Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -211,16 +211,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: vectorized
@@ -294,11 +294,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -308,11 +308,11 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 2
- Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
compressed: false
- Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -322,16 +322,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Stage: Stage-0
@@ -497,11 +497,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -511,7 +511,7 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 3
- Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 16 Data size: 4016 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Map Join Operator
condition map:
@@ -522,7 +522,7 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 4
- Statistics: Num rows: 17 Data size: 4214 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 17 Data size: 4417 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Group By Operator
aggregations: count(), sum(_col0)
@@ -538,31 +538,31 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
index 7568abf..aa201ad 100644
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
@@ -213,11 +213,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -227,11 +227,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
input vertices:
1 Map 2
- Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
compressed: false
- Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -241,16 +241,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: vectorized
@@ -359,11 +359,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -373,11 +373,11 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 2
- Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
File Output Operator
compressed: false
- Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -387,16 +387,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Stage: Stage-0
@@ -867,11 +867,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Left Outer Join0 to 1
@@ -881,7 +881,7 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 3
- Statistics: Num rows: 33 Data size: 7521 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 33 Data size: 7706 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Map Join Operator
condition map:
@@ -891,7 +891,7 @@ STAGE PLANS:
1 _col0 (type: tinyint)
input vertices:
1 Map 4
- Statistics: Num rows: 36 Data size: 8273 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 36 Data size: 8476 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -907,31 +907,31 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
alias: c
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint)
outputColumnNames: _col0
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
[15/22] hive git commit: HIVE-11925 : Hive file format checking
breaks load from named pipes (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11925 : Hive file format checking breaks load from named pipes (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/82bc0e1c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/82bc0e1c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/82bc0e1c
Branch: refs/heads/llap
Commit: 82bc0e1c79ca656ec34a43efe4a8807f0f655e30
Parents: 24988f7
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 12:42:28 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 12:42:28 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/io/HiveFileFormatUtils.java | 95 ++++++++++++++------
.../hadoop/hive/ql/io/InputFormatChecker.java | 5 +-
.../hadoop/hive/ql/io/RCFileInputFormat.java | 3 +-
.../ql/io/SequenceFileInputFormatChecker.java | 3 +-
.../hive/ql/io/VectorizedRCFileInputFormat.java | 3 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 +-
.../ql/io/orc/VectorizedOrcInputFormat.java | 2 +-
.../hive/ql/exec/TestFileSinkOperator.java | 2 +-
.../hive/ql/txn/compactor/CompactorTest.java | 2 +-
9 files changed, 80 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
index 50ba740..06d3df7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
@@ -19,8 +19,13 @@
package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
+import java.nio.file.FileSystemNotFoundException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -28,10 +33,13 @@ import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
@@ -69,6 +77,7 @@ import org.apache.hive.common.util.ReflectionUtil;
*
*/
public final class HiveFileFormatUtils {
+ private static final Log LOG = LogFactory.getLog(HiveFileFormatUtils.class);
static {
outputFormatSubstituteMap =
@@ -177,44 +186,51 @@ public final class HiveFileFormatUtils {
*/
@SuppressWarnings("unchecked")
public static boolean checkInputFormat(FileSystem fs, HiveConf conf,
- Class<? extends InputFormat> inputFormatCls, ArrayList<FileStatus> files)
+ Class<? extends InputFormat> inputFormatCls, List<FileStatus> files)
throws HiveException {
- if (files.size() > 0) {
- Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls);
- if (checkerCls == null
- && inputFormatCls.isAssignableFrom(TextInputFormat.class)) {
- // we get a text input format here, we can not determine a file is text
- // according to its content, so we can do is to test if other file
- // format can accept it. If one other file format can accept this file,
- // we treat this file as text file, although it maybe not.
- return checkTextInputFormat(fs, conf, files);
- }
+ if (files.isEmpty()) return false;
+ Class<? extends InputFormatChecker> checkerCls = getInputFormatChecker(inputFormatCls);
+ if (checkerCls == null
+ && inputFormatCls.isAssignableFrom(TextInputFormat.class)) {
+ // we get a text input format here, we can not determine a file is text
+ // according to its content, so we can do is to test if other file
+ // format can accept it. If one other file format can accept this file,
+ // we treat this file as text file, although it maybe not.
+ return checkTextInputFormat(fs, conf, files);
+ }
- if (checkerCls != null) {
- InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache
- .get(checkerCls);
- try {
- if (checkerInstance == null) {
- checkerInstance = checkerCls.newInstance();
- inputFormatCheckerInstanceCache.put(checkerCls, checkerInstance);
- }
- return checkerInstance.validateInput(fs, conf, files);
- } catch (Exception e) {
- throw new HiveException(e);
+ if (checkerCls != null) {
+ InputFormatChecker checkerInstance = inputFormatCheckerInstanceCache.get(checkerCls);
+ try {
+ if (checkerInstance == null) {
+ checkerInstance = checkerCls.newInstance();
+ inputFormatCheckerInstanceCache.put(checkerCls, checkerInstance);
}
+ return checkerInstance.validateInput(fs, conf, files);
+ } catch (Exception e) {
+ throw new HiveException(e);
}
- return true;
}
- return false;
+ return true;
}
@SuppressWarnings("unchecked")
private static boolean checkTextInputFormat(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files) throws HiveException {
- Set<Class<? extends InputFormat>> inputFormatter = inputFormatCheckerMap
- .keySet();
+ List<FileStatus> files) throws HiveException {
+ List<FileStatus> files2 = new LinkedList<>(files);
+ Iterator<FileStatus> iter = files2.iterator();
+ while (iter.hasNext()) {
+ FileStatus file = iter.next();
+ if (file == null) continue;
+ if (isPipe(fs, file)) {
+ LOG.info("Skipping format check for " + file.getPath() + " as it is a pipe");
+ iter.remove();
+ }
+ }
+ if (files2.isEmpty()) return true;
+ Set<Class<? extends InputFormat>> inputFormatter = inputFormatCheckerMap.keySet();
for (Class<? extends InputFormat> reg : inputFormatter) {
- boolean result = checkInputFormat(fs, conf, reg, files);
+ boolean result = checkInputFormat(fs, conf, reg, files2);
if (result) {
return false;
}
@@ -222,6 +238,29 @@ public final class HiveFileFormatUtils {
return true;
}
+ // See include/uapi/linux/stat.h
+ private static final int S_IFIFO = 0010000;
+ private static boolean isPipe(FileSystem fs, FileStatus file) {
+ if (fs instanceof DistributedFileSystem) {
+ return false; // Shortcut for HDFS.
+ }
+ int mode = 0;
+ Object pathToLog = file.getPath();
+ try {
+ java.nio.file.Path realPath = Paths.get(file.getPath().toUri());
+ pathToLog = realPath;
+ mode = (Integer)Files.getAttribute(realPath, "unix:mode");
+ } catch (FileSystemNotFoundException t) {
+ return false; // Probably not a local filesystem; no need to check.
+ } catch (UnsupportedOperationException | IOException
+ | SecurityException | IllegalArgumentException t) {
+ LOG.info("Failed to check mode for " + pathToLog + ": "
+ + t.getMessage() + " (" + t.getClass() + ")");
+ return false;
+ }
+ return (mode & S_IFIFO) != 0;
+ }
+
public static RecordWriter getHiveRecordWriter(JobConf jc,
TableDesc tableInfo, Class<? extends Writable> outputClass,
FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
index 3945411..129b834 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/InputFormatChecker.java
@@ -19,7 +19,7 @@
package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
-import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -34,7 +34,6 @@ public interface InputFormatChecker {
* This method is used to validate the input files.
*
*/
- boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files) throws IOException;
+ boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws IOException;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
index 88198ed..6004db8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileInputFormat.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -59,7 +60,7 @@ public class RCFileInputFormat<K extends LongWritable, V extends BytesRefArrayWr
@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files) throws IOException {
+ List<FileStatus> files) throws IOException {
if (files.size() <= 0) {
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
index e2666d7..6cb46c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/SequenceFileInputFormatChecker.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -35,7 +36,7 @@ public class SequenceFileInputFormatChecker implements InputFormatChecker {
@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files) throws IOException {
+ List<FileStatus> files) throws IOException {
if (files.size() <= 0) {
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
index faad5f2..e9e1d5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -56,7 +57,7 @@ public class VectorizedRCFileInputFormat extends FileInputFormat<NullWritable, V
@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files) throws IOException {
+ List<FileStatus> files) throws IOException {
if (files.size() <= 0) {
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index c45b6e6..57bde3e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -106,7 +106,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
* that added this event. Insert and update events include the entire row, while
* delete events have null for row.
*/
-public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
+public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
InputFormatChecker, VectorizedInputFormatInterface,
AcidInputFormat<NullWritable, OrcStruct>, CombineHiveInputFormat.AvoidSplitCombination {
@@ -395,7 +395,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
}
@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files
+ List<FileStatus> files
) throws IOException {
if (Utilities.isVectorMode(conf)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
index 3992d8c..bf09001 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
@@ -158,7 +158,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect
@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
- ArrayList<FileStatus> files
+ List<FileStatus> files
) throws IOException {
if (files.size() <= 0) {
return false;
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index 9e89376..4594836 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -704,7 +704,7 @@ public class TestFileSinkOperator {
}
@Override
- public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws
+ public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws
IOException {
return false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/82bc0e1c/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 21adc9d..5a8c932 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -361,7 +361,7 @@ public abstract class CompactorTest {
}
@Override
- public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws
+ public boolean validateInput(FileSystem fs, HiveConf conf, List<FileStatus> files) throws
IOException {
return false;
}
[06/22] hive git commit: HIVE-11916 TxnHandler.getOpenTxnsInfo() and
getOpenTxns() may produce inconsistent result (Eugene Koifman,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11916 TxnHandler.getOpenTxnsInfo() and getOpenTxns() may produce inconsistent result (Eugene Koifman, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6ab68e8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6ab68e8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6ab68e8
Branch: refs/heads/llap
Commit: a6ab68e8ff889933a6d7d164d8c91ed8d3fa8609
Parents: 0d43e87
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Sep 30 16:07:20 2015 -0700
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Sep 30 16:07:20 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/txn/TxnHandler.java | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a6ab68e8/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index cc7e2c6..6218a03 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -47,8 +47,9 @@ import java.util.concurrent.TimeUnit;
* A handler to answer transaction related calls that come into the metastore
* server.
*
- * Note on log messages: Please include txnid:X and lockid info
- * {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
+ * Note on log messages: Please include txnid:X and lockid info using
+ * {@link org.apache.hadoop.hive.common.JavaUtils#txnIdToString(long)}
+ * and {@link org.apache.hadoop.hive.common.JavaUtils#lockIdToString(long)} in all messages.
* The txnid:X and lockid:Y matches how Thrift object toString() methods are generated,
* so keeping the format consistent makes grep'ing the logs much easier.
*/
@@ -166,7 +167,8 @@ public class TxnHandler {
}
List<TxnInfo> txnInfo = new ArrayList<TxnInfo>();
- s = "select txn_id, txn_state, txn_user, txn_host from TXNS";
+ //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+ s = "select txn_id, txn_state, txn_user, txn_host from TXNS where txn_id <= " + hwm;
LOG.debug("Going to execute query<" + s + ">");
rs = stmt.executeQuery(s);
while (rs.next()) {
@@ -230,7 +232,8 @@ public class TxnHandler {
}
Set<Long> openList = new HashSet<Long>();
- s = "select txn_id from TXNS";
+ //need the WHERE clause below to ensure consistent results with READ_COMMITTED
+ s = "select txn_id from TXNS where txn_id <= " + hwm;
LOG.debug("Going to execute query<" + s + ">");
rs = stmt.executeQuery(s);
while (rs.next()) {
@@ -1459,7 +1462,7 @@ public class TxnHandler {
LockResponse response = new LockResponse();
response.setLockid(extLockId);
- LOG.debug("checkLock(): Setting savepoint. extLockId=" + extLockId);
+ LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
Savepoint save = dbConn.setSavepoint();
StringBuilder query = new StringBuilder("select hl_lock_ext_id, " +
"hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " +
@@ -1685,7 +1688,7 @@ public class TxnHandler {
if (rc < 1) {
LOG.debug("Going to rollback");
dbConn.rollback();
- throw new NoSuchLockException("No such lock: (" + extLockId + "," +
+ throw new NoSuchLockException("No such lock: (" + JavaUtils.lockIdToString(extLockId) + "," +
+ intLockId + ")");
}
// We update the database, but we don't commit because there may be other
@@ -1710,7 +1713,7 @@ public class TxnHandler {
if (rc < 1) {
LOG.debug("Going to rollback");
dbConn.rollback();
- throw new NoSuchLockException("No such lock: " + extLockId);
+ throw new NoSuchLockException("No such lock: " + JavaUtils.lockIdToString(extLockId));
}
LOG.debug("Going to commit");
dbConn.commit();
[08/22] hive git commit: HIVE-11445 : CBO: Calcite Operator To Hive
Operator (Calcite Return Path) : groupby distinct does not work (Jesus
Camacho Rodriguez, reviewed by Pengcheng Xiong)
Posted by se...@apache.org.
HIVE-11445 : CBO: Calcite Operator To Hive Operator (Calcite Return Path) : groupby distinct does not work (Jesus Camacho Rodriguez, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99fa337b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99fa337b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99fa337b
Branch: refs/heads/llap
Commit: 99fa337b0b146be984fc49d52ecb1a3494164082
Parents: 2c445cc
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Sep 30 09:32:27 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Oct 1 10:10:05 2015 +0100
----------------------------------------------------------------------
.../calcite/translator/HiveGBOpConvUtil.java | 43 ++--
.../cbo_rp_gby2_map_multi_distinct.q | 38 +++
.../cbo_rp_gby2_map_multi_distinct.q.out | 236 +++++++++++++++++++
3 files changed, 299 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index a0e2e67..a129cf3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -40,15 +40,14 @@ import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter.OpAttr;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -70,12 +69,17 @@ import com.google.common.collect.ImmutableList;
* external names if possible.<br>
* 3. In ExprNode & in ColumnInfo the tableAlias/VirtualColumn is specified
* differently for different GB/RS in pipeline. Remove the different treatments.
- * 3. VirtualColMap needs to be maintained
+ * 4. VirtualColMap needs to be maintained
*
*/
public class HiveGBOpConvUtil {
+
private static enum HIVEGBPHYSICALMODE {
- MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB, MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB, MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT, MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT, NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
+ MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB,
+ MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB,
+ MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT,
+ MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT,
+ NO_MAP_SIDE_GB_NO_SKEW, NO_MAP_SIDE_GB_SKEW
};
private static class UDAFAttrs {
@@ -94,8 +98,8 @@ public class HiveGBOpConvUtil {
private final List<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
private final List<Integer> grpSets = new ArrayList<Integer>();
- private boolean grpSetRqrAdditionalMRJob;
- private boolean grpIdFunctionNeeded;
+ private boolean grpSetRqrAdditionalMRJob;
+ private boolean grpIdFunctionNeeded;
private final List<String> distExprNames = new ArrayList<String>();
private final List<TypeInfo> distExprTypes = new ArrayList<TypeInfo>();
@@ -105,12 +109,12 @@ public class HiveGBOpConvUtil {
private final List<ExprNodeDesc> deDupedNonDistIrefs = new ArrayList<ExprNodeDesc>();
private final List<UDAFAttrs> udafAttrs = new ArrayList<UDAFAttrs>();
- private boolean containsDistinctAggr = false;
+ private boolean containsDistinctAggr = false;
- float groupByMemoryUsage;
- float memoryThreshold;
+ float groupByMemoryUsage;
+ float memoryThreshold;
- private HIVEGBPHYSICALMODE gbPhysicalPipelineMode;
+ private HIVEGBPHYSICALMODE gbPhysicalPipelineMode;
};
private static HIVEGBPHYSICALMODE getAggOPMode(HiveConf hc, GBInfo gbInfo) {
@@ -203,11 +207,14 @@ public class HiveGBOpConvUtil {
for (int i = 0; i < argLst.size(); i++) {
if (!distinctRefs.contains(argLst.get(i))) {
distinctRefs.add(argLst.get(i));
- distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv);
- gbInfo.distExprNodes.add(distinctExpr);
- gbInfo.distExprNames.add(argNames.get(i));
- gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+ // Only distinct nodes that are NOT part of the key should be added to distExprNodes
+ if (ExprNodeDescUtils.indexOf(distinctExpr, gbInfo.gbKeys) < 0) {
+ distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
+ gbInfo.distExprNodes.add(distinctExpr);
+ gbInfo.distExprNames.add(argNames.get(i));
+ gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
+ }
}
}
}
@@ -254,10 +261,10 @@ public class HiveGBOpConvUtil {
}
// special handling for count, similar to PlanModifierForASTConv::replaceEmptyGroupAggr()
- udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
- new ArrayList<ExprNodeDesc>(udafAttrs.udafParams), new ASTNode(),
- udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
- "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
+ udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName,
+ new ArrayList<ExprNodeDesc>(udafAttrs.udafParams), new ASTNode(),
+ udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 &&
+ "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
gbInfo.udafAttrs.add(udafAttrs);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
new file mode 100644
index 0000000..28f1f81
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_gby2_map_multi_distinct.q
@@ -0,0 +1,38 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+set mapred.reduce.tasks=31;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1;
+
+-- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1;
http://git-wip-us.apache.org/repos/asf/hive/blob/99fa337b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
new file mode 100644
index 0000000..8592d6c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
@@ -0,0 +1,236 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+ outputColumnNames: $f0, $f1, $f2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2)
+ keys: $f0 (type: string), $f1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: double), _col5 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: $f0, $f1, $f2, $f3, $f4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 1 00.0 0 3
+1 71 116414.0 10044 115
+2 69 225571.0 15780 111
+3 62 332004.0 20119 99
+4 74 452763.0 30965 124
+5 6 5397.0 278 10
+6 5 6398.0 331 6
+7 6 7735.0 447 10
+8 8 8762.0 595 10
+9 7 91047.0 577 12
+PREHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string)
+ outputColumnNames: $f0, $f1, $f2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2)
+ keys: $f0 (type: string), $f1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col3 (type: double), _col5 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: $f0, $f1, $f2, $f3, $f4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0 1 00.0 0 3
+1 1 116414.0 10044 115
+2 1 225571.0 15780 111
+3 1 332004.0 20119 99
+4 1 452763.0 30965 124
+5 1 5397.0 278 10
+6 1 6398.0 331 6
+7 1 7735.0 447 10
+8 1 8762.0 595 10
+9 1 91047.0 577 12
[21/22] hive git commit: HIVE-12013 : LLAP: disable most llap tests
before merge (Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-12013 : LLAP: disable most llap tests before merge (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a1bc2ef4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a1bc2ef4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a1bc2ef4
Branch: refs/heads/llap
Commit: a1bc2ef4cda1fb9a42a58b9433bf60737519d32c
Parents: f272ccb
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 19:29:32 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 19:29:32 2015 -0700
----------------------------------------------------------------------
itests/qtest/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a1bc2ef4/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 8c41b5a..bb5b1a1 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -538,7 +538,7 @@
templatePath="${basedir}/${hive.path.to.root}/ql/src/test/templates/" template="TestCliDriver.vm"
queryDirectory="${basedir}/${hive.path.to.root}/ql/src/test/queries/clientpositive/"
queryFile="${qfile}"
- includeQueryFile="${minitez.query.files},${minitez.query.files.shared}"
+ includeQueryFile="${minitez.query.files}"
queryFileRegex="${qfile_regex}"
clusterMode="llap"
runDisabled="${run_disabled}"
[12/22] hive git commit: HIVE-4243. Fix column names in ORC metadata.
Posted by se...@apache.org.
HIVE-4243. Fix column names in ORC metadata.
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b1ed3d3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b1ed3d3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b1ed3d3
Branch: refs/heads/llap
Commit: 7b1ed3d3037860e2b7fc24b760a993f5e928b816
Parents: 99fa337
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Sep 4 16:11:13 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Thu Oct 1 13:07:03 2015 +0200
----------------------------------------------------------------------
.../hive/ql/io/orc/ColumnStatisticsImpl.java | 55 +-
.../apache/hadoop/hive/ql/io/orc/OrcFile.java | 33 +-
.../hadoop/hive/ql/io/orc/OrcOutputFormat.java | 145 ++++-
.../apache/hadoop/hive/ql/io/orc/OrcUtils.java | 177 +-----
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 2 +-
.../hadoop/hive/ql/io/orc/TypeDescription.java | 466 ++++++++++++++++
.../apache/hadoop/hive/ql/io/orc/Writer.java | 9 +
.../hadoop/hive/ql/io/orc/WriterImpl.java | 550 +++++++++----------
.../hadoop/hive/ql/io/orc/orc_proto.proto | 1 +
.../hive/ql/io/orc/TestColumnStatistics.java | 43 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 15 +-
.../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 +-
.../hive/ql/io/orc/TestOrcRawRecordMerger.java | 2 +-
.../hadoop/hive/ql/io/orc/TestOrcWideTable.java | 224 +-------
.../hive/ql/io/orc/TestTypeDescription.java | 67 +++
.../resources/orc-file-dump-bloomfilter.out | 2 +-
.../resources/orc-file-dump-bloomfilter2.out | 2 +-
.../orc-file-dump-dictionary-threshold.out | 2 +-
ql/src/test/resources/orc-file-dump.json | 2 +-
ql/src/test/resources/orc-file-dump.out | 2 +-
ql/src/test/resources/orc-file-has-null.out | 2 +-
.../clientpositive/annotate_stats_part.q.out | 6 +-
.../clientpositive/annotate_stats_table.q.out | 4 +-
.../dynpart_sort_opt_vectorization.q.out | 16 +-
.../dynpart_sort_optimization2.q.out | 8 +-
.../extrapolate_part_stats_full.q.out | 24 +-
.../extrapolate_part_stats_partial.q.out | 76 +--
.../extrapolate_part_stats_partial_ndv.q.out | 38 +-
.../results/clientpositive/orc_analyze.q.out | 46 +-
.../results/clientpositive/orc_file_dump.q.out | 18 +-
.../clientpositive/orc_int_type_promotion.q.out | 6 +-
.../clientpositive/spark/vectorized_ptf.q.out | 108 ++--
.../tez/dynpart_sort_opt_vectorization.q.out | 16 +-
.../tez/dynpart_sort_optimization2.q.out | 8 +-
.../clientpositive/tez/orc_analyze.q.out | 46 +-
.../clientpositive/tez/union_fast_stats.q.out | 16 +-
.../clientpositive/tez/vector_outer_join1.q.out | 48 +-
.../clientpositive/tez/vector_outer_join4.q.out | 48 +-
.../clientpositive/tez/vectorized_ptf.q.out | 108 ++--
.../clientpositive/union_fast_stats.q.out | 16 +-
.../results/clientpositive/vectorized_ptf.q.out | 104 ++--
41 files changed, 1468 insertions(+), 1134 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
index 15a3e2c..f39d3e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
@@ -22,8 +22,6 @@ import java.sql.Timestamp;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
@@ -964,35 +962,30 @@ class ColumnStatisticsImpl implements ColumnStatistics {
return builder;
}
- static ColumnStatisticsImpl create(ObjectInspector inspector) {
- switch (inspector.getCategory()) {
- case PRIMITIVE:
- switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
- case BOOLEAN:
- return new BooleanStatisticsImpl();
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- return new IntegerStatisticsImpl();
- case FLOAT:
- case DOUBLE:
- return new DoubleStatisticsImpl();
- case STRING:
- case CHAR:
- case VARCHAR:
- return new StringStatisticsImpl();
- case DECIMAL:
- return new DecimalStatisticsImpl();
- case DATE:
- return new DateStatisticsImpl();
- case TIMESTAMP:
- return new TimestampStatisticsImpl();
- case BINARY:
- return new BinaryStatisticsImpl();
- default:
- return new ColumnStatisticsImpl();
- }
+ static ColumnStatisticsImpl create(TypeDescription schema) {
+ switch (schema.getCategory()) {
+ case BOOLEAN:
+ return new BooleanStatisticsImpl();
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new IntegerStatisticsImpl();
+ case FLOAT:
+ case DOUBLE:
+ return new DoubleStatisticsImpl();
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ return new StringStatisticsImpl();
+ case DECIMAL:
+ return new DecimalStatisticsImpl();
+ case DATE:
+ return new DateStatisticsImpl();
+ case TIMESTAMP:
+ return new TimestampStatisticsImpl();
+ case BINARY:
+ return new BinaryStatisticsImpl();
default:
return new ColumnStatisticsImpl();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index a60ebb4..23dec4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
/**
* Contains factory methods to read or write ORC files.
@@ -102,7 +103,9 @@ public final class OrcFile {
*/
public enum WriterVersion {
ORIGINAL(0),
- HIVE_8732(1); // corrupted stripe/file maximum column statistics
+ HIVE_8732(1), // corrupted stripe/file maximum column statistics
+ HIVE_4243(2), // use real column names from Hive tables
+ FUTURE(Integer.MAX_VALUE); // a version from a future writer
private final int id;
@@ -205,7 +208,9 @@ public final class OrcFile {
public static class WriterOptions {
private final Configuration configuration;
private FileSystem fileSystemValue = null;
- private ObjectInspector inspectorValue = null;
+ private boolean explicitSchema = false;
+ private TypeDescription schema = null;
+ private ObjectInspector inspector = null;
private long stripeSizeValue;
private long blockSizeValue;
private int rowIndexStrideValue;
@@ -355,11 +360,26 @@ public final class OrcFile {
}
/**
- * A required option that sets the object inspector for the rows. Used
- * to determine the schema for the file.
+ * A required option that sets the object inspector for the rows. If
+ * setSchema is not called, it also defines the schema.
*/
public WriterOptions inspector(ObjectInspector value) {
- inspectorValue = value;
+ this.inspector = value;
+ if (!explicitSchema) {
+ schema = OrcOutputFormat.convertTypeInfo(
+ TypeInfoUtils.getTypeInfoFromObjectInspector(value));
+ }
+ return this;
+ }
+
+ /**
+ * Set the schema for the file. This is a required parameter.
+ * @param schema the schema for the file.
+ * @return this
+ */
+ public WriterOptions setSchema(TypeDescription schema) {
+ this.explicitSchema = true;
+ this.schema = schema;
return this;
}
@@ -426,7 +446,8 @@ public final class OrcFile {
FileSystem fs = opts.fileSystemValue == null ?
path.getFileSystem(opts.configuration) : opts.fileSystemValue;
- return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue,
+ return new WriterImpl(fs, path, opts.configuration, opts.inspector,
+ opts.schema,
opts.stripeSizeValue, opts.compressValue,
opts.bufferSizeValue, opts.rowIndexStrideValue,
opts.memoryManagerValue, opts.blockPaddingValue,
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index ea4ebb4..ad24c58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -20,12 +20,17 @@ package org.apache.hadoop.hive.ql.io.orc;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import java.util.Properties;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.RecordUpdater;
import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.EncodingStrategy;
@@ -36,6 +41,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
@@ -52,6 +66,90 @@ import org.apache.hadoop.util.Progressable;
public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
implements AcidOutputFormat<NullWritable, OrcSerdeRow> {
+ private static final Log LOG = LogFactory.getLog(OrcOutputFormat.class);
+
+ static TypeDescription convertTypeInfo(TypeInfo info) {
+ switch (info.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info;
+ switch (pinfo.getPrimitiveCategory()) {
+ case BOOLEAN:
+ return TypeDescription.createBoolean();
+ case BYTE:
+ return TypeDescription.createByte();
+ case SHORT:
+ return TypeDescription.createShort();
+ case INT:
+ return TypeDescription.createInt();
+ case LONG:
+ return TypeDescription.createLong();
+ case FLOAT:
+ return TypeDescription.createFloat();
+ case DOUBLE:
+ return TypeDescription.createDouble();
+ case STRING:
+ return TypeDescription.createString();
+ case DATE:
+ return TypeDescription.createDate();
+ case TIMESTAMP:
+ return TypeDescription.createTimestamp();
+ case BINARY:
+ return TypeDescription.createBinary();
+ case DECIMAL: {
+ DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
+ return TypeDescription.createDecimal()
+ .withScale(dinfo.getScale())
+ .withPrecision(dinfo.getPrecision());
+ }
+ case VARCHAR: {
+ BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
+ return TypeDescription.createVarchar()
+ .withMaxLength(cinfo.getLength());
+ }
+ case CHAR: {
+ BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo;
+ return TypeDescription.createChar()
+ .withMaxLength(cinfo.getLength());
+ }
+ default:
+ throw new IllegalArgumentException("ORC doesn't handle primitive" +
+ " category " + pinfo.getPrimitiveCategory());
+ }
+ }
+ case LIST: {
+ ListTypeInfo linfo = (ListTypeInfo) info;
+ return TypeDescription.createList
+ (convertTypeInfo(linfo.getListElementTypeInfo()));
+ }
+ case MAP: {
+ MapTypeInfo minfo = (MapTypeInfo) info;
+ return TypeDescription.createMap
+ (convertTypeInfo(minfo.getMapKeyTypeInfo()),
+ convertTypeInfo(minfo.getMapValueTypeInfo()));
+ }
+ case UNION: {
+ UnionTypeInfo minfo = (UnionTypeInfo) info;
+ TypeDescription result = TypeDescription.createUnion();
+ for (TypeInfo child: minfo.getAllUnionObjectTypeInfos()) {
+ result.addUnionChild(convertTypeInfo(child));
+ }
+ return result;
+ }
+ case STRUCT: {
+ StructTypeInfo sinfo = (StructTypeInfo) info;
+ TypeDescription result = TypeDescription.createStruct();
+ for(String fieldName: sinfo.getAllStructFieldNames()) {
+ result.addField(fieldName,
+ convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName)));
+ }
+ return result;
+ }
+ default:
+ throw new IllegalArgumentException("ORC doesn't handle " +
+ info.getCategory());
+ }
+ }
+
private static class OrcRecordWriter
implements RecordWriter<NullWritable, OrcSerdeRow>,
StatsProvidingRecordWriter {
@@ -115,7 +213,44 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
}
private OrcFile.WriterOptions getOptions(JobConf conf, Properties props) {
- return OrcFile.writerOptions(props, conf);
+ OrcFile.WriterOptions result = OrcFile.writerOptions(props, conf);
+ if (props != null) {
+ final String columnNameProperty =
+ props.getProperty(IOConstants.COLUMNS);
+ final String columnTypeProperty =
+ props.getProperty(IOConstants.COLUMNS_TYPES);
+ if (columnNameProperty != null &&
+ !columnNameProperty.isEmpty() &&
+ columnTypeProperty != null &&
+ !columnTypeProperty.isEmpty()) {
+ List<String> columnNames;
+ List<TypeInfo> columnTypes;
+
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes =
+ TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+
+ TypeDescription schema = TypeDescription.createStruct();
+ for (int i = 0; i < columnNames.size(); ++i) {
+ schema.addField(columnNames.get(i),
+ convertTypeInfo(columnTypes.get(i)));
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("ORC schema = " + schema);
+ }
+ result.setSchema(schema);
+ }
+ }
+ return result;
}
@Override
@@ -123,7 +258,7 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
getRecordWriter(FileSystem fileSystem, JobConf conf, String name,
Progressable reporter) throws IOException {
return new
- OrcRecordWriter(new Path(name), getOptions(conf,null));
+ OrcRecordWriter(new Path(name), getOptions(conf, null));
}
@@ -135,7 +270,7 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
boolean isCompressed,
Properties tableProperties,
Progressable reporter) throws IOException {
- return new OrcRecordWriter(path, getOptions(conf,tableProperties));
+ return new OrcRecordWriter(path, getOptions(conf, tableProperties));
}
private class DummyOrcRecordUpdater implements RecordUpdater {
@@ -229,8 +364,8 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
}
@Override
- public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter(Path path,
- Options options) throws IOException {
+ public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter
+ getRawRecordWriter(Path path, Options options) throws IOException {
final Path filename = AcidUtils.createFilename(path, options);
final OrcFile.WriterOptions opts =
OrcFile.writerOptions(options.getConfiguration());
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
index db2ca15..3e2af23 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java
@@ -18,20 +18,10 @@
package org.apache.hadoop.hive.ql.io.orc;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
-
-import com.google.common.collect.Lists;
public class OrcUtils {
private static final Log LOG = LogFactory.getLog(OrcUtils.class);
@@ -49,159 +39,44 @@ public class OrcUtils {
* index 5 correspond to column d. After flattening list<string> gets 2 columns.
*
* @param selectedColumns - comma separated list of selected column names
- * @param allColumns - comma separated list of all column names
- * @param inspector - object inspector
+ * @param schema - object schema
* @return - boolean array with true value set for the specified column names
*/
- public static boolean[] includeColumns(String selectedColumns, String allColumns,
- ObjectInspector inspector) {
- int numFlattenedCols = getFlattenedColumnsCount(inspector);
- boolean[] results = new boolean[numFlattenedCols];
+ public static boolean[] includeColumns(String selectedColumns,
+ TypeDescription schema) {
+ int numFlattenedCols = schema.getMaximumId();
+ boolean[] results = new boolean[numFlattenedCols + 1];
if ("*".equals(selectedColumns)) {
Arrays.fill(results, true);
return results;
}
- if (selectedColumns != null && !selectedColumns.isEmpty()) {
- includeColumnsImpl(results, selectedColumns.toLowerCase(), allColumns, inspector);
- }
- return results;
- }
-
- private static void includeColumnsImpl(boolean[] includeColumns, String selectedColumns,
- String allColumns,
- ObjectInspector inspector) {
- Map<String, List<Integer>> columnSpanMap = getColumnSpan(allColumns, inspector);
- LOG.info("columnSpanMap: " + columnSpanMap);
-
- String[] selCols = selectedColumns.split(",");
- for (String sc : selCols) {
- if (columnSpanMap.containsKey(sc)) {
- List<Integer> colSpan = columnSpanMap.get(sc);
- int start = colSpan.get(0);
- int end = colSpan.get(1);
- for (int i = start; i <= end; i++) {
- includeColumns[i] = true;
+ if (selectedColumns != null &&
+ schema.getCategory() == TypeDescription.Category.STRUCT) {
+ List<String> fieldNames = schema.getFieldNames();
+ List<TypeDescription> fields = schema.getChildren();
+ for (String column: selectedColumns.split((","))) {
+ TypeDescription col = findColumn(column, fieldNames, fields);
+ if (col != null) {
+ for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
+ results[i] = true;
}
}
}
-
- LOG.info("includeColumns: " + Arrays.toString(includeColumns));
}
-
- private static Map<String, List<Integer>> getColumnSpan(String allColumns,
- ObjectInspector inspector) {
- // map that contains the column span for each column. Column span is the number of columns
- // required after flattening. For a given object inspector this map contains the start column
- // id and end column id (both inclusive) after flattening.
- // EXAMPLE:
- // schema: struct<a:int, b:float, c:map<string,int>>
- // column span map for the above struct will be
- // a => [1,1], b => [2,2], c => [3,5]
- Map<String, List<Integer>> columnSpanMap = new HashMap<String, List<Integer>>();
- if (allColumns != null) {
- String[] columns = allColumns.split(",");
- int startIdx = 0;
- int endIdx = 0;
- if (inspector instanceof StructObjectInspector) {
- StructObjectInspector soi = (StructObjectInspector) inspector;
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- for (int i = 0; i < fields.size(); i++) {
- StructField sf = fields.get(i);
-
- // we get the type (category) from object inspector but column name from the argument.
- // The reason for this is hive (FileSinkOperator) does not pass the actual column names,
- // instead it passes the internal column names (_col1,_col2).
- ObjectInspector sfOI = sf.getFieldObjectInspector();
- String colName = columns[i];
-
- startIdx = endIdx + 1;
- switch (sfOI.getCategory()) {
- case PRIMITIVE:
- endIdx += 1;
- break;
- case STRUCT:
- endIdx += 1;
- StructObjectInspector structInsp = (StructObjectInspector) sfOI;
- List<? extends StructField> structFields = structInsp.getAllStructFieldRefs();
- for (int j = 0; j < structFields.size(); ++j) {
- endIdx += getFlattenedColumnsCount(structFields.get(j).getFieldObjectInspector());
- }
- break;
- case MAP:
- endIdx += 1;
- MapObjectInspector mapInsp = (MapObjectInspector) sfOI;
- endIdx += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
- endIdx += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
- break;
- case LIST:
- endIdx += 1;
- ListObjectInspector listInsp = (ListObjectInspector) sfOI;
- endIdx += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
- break;
- case UNION:
- endIdx += 1;
- UnionObjectInspector unionInsp = (UnionObjectInspector) sfOI;
- List<ObjectInspector> choices = unionInsp.getObjectInspectors();
- for (int j = 0; j < choices.size(); ++j) {
- endIdx += getFlattenedColumnsCount(choices.get(j));
- }
- break;
- default:
- throw new IllegalArgumentException("Bad category: " +
- inspector.getCategory());
- }
-
- columnSpanMap.put(colName, Lists.newArrayList(startIdx, endIdx));
- }
- }
- }
- return columnSpanMap;
+ return results;
}
- /**
- * Returns the number of columns after flatting complex types.
- *
- * @param inspector - object inspector
- * @return
- */
- public static int getFlattenedColumnsCount(ObjectInspector inspector) {
- int numWriters = 0;
- switch (inspector.getCategory()) {
- case PRIMITIVE:
- numWriters += 1;
- break;
- case STRUCT:
- numWriters += 1;
- StructObjectInspector structInsp = (StructObjectInspector) inspector;
- List<? extends StructField> fields = structInsp.getAllStructFieldRefs();
- for (int i = 0; i < fields.size(); ++i) {
- numWriters += getFlattenedColumnsCount(fields.get(i).getFieldObjectInspector());
- }
- break;
- case MAP:
- numWriters += 1;
- MapObjectInspector mapInsp = (MapObjectInspector) inspector;
- numWriters += getFlattenedColumnsCount(mapInsp.getMapKeyObjectInspector());
- numWriters += getFlattenedColumnsCount(mapInsp.getMapValueObjectInspector());
- break;
- case LIST:
- numWriters += 1;
- ListObjectInspector listInsp = (ListObjectInspector) inspector;
- numWriters += getFlattenedColumnsCount(listInsp.getListElementObjectInspector());
- break;
- case UNION:
- numWriters += 1;
- UnionObjectInspector unionInsp = (UnionObjectInspector) inspector;
- List<ObjectInspector> choices = unionInsp.getObjectInspectors();
- for (int i = 0; i < choices.size(); ++i) {
- numWriters += getFlattenedColumnsCount(choices.get(i));
- }
- break;
- default:
- throw new IllegalArgumentException("Bad category: " +
- inspector.getCategory());
+ private static TypeDescription findColumn(String columnName,
+ List<String> fieldNames,
+ List<TypeDescription> fields) {
+ int i = 0;
+ for(String fieldName: fieldNames) {
+ if (fieldName.equalsIgnoreCase(columnName)) {
+ return fields.get(i);
+ } else {
+ i += 1;
+ }
}
- return numWriters;
+ return null;
}
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 23b3b55..36fb858 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -347,7 +347,7 @@ public class ReaderImpl implements Reader {
return version;
}
}
- return OrcFile.WriterVersion.ORIGINAL;
+ return OrcFile.WriterVersion.FUTURE;
}
/** Extracts the necessary metadata from an externally store buffer (fullFooterBuffer). */
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
new file mode 100644
index 0000000..3481bb3
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java
@@ -0,0 +1,466 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This is the description of the types in an ORC file.
+ */
+public class TypeDescription {
+ private static final int MAX_PRECISION = 38;
+ private static final int MAX_SCALE = 38;
+ private static final int DEFAULT_PRECISION = 38;
+ private static final int DEFAULT_SCALE = 10;
+ private static final int DEFAULT_LENGTH = 256;
+ public enum Category {
+ BOOLEAN("boolean", true),
+ BYTE("tinyint", true),
+ SHORT("smallint", true),
+ INT("int", true),
+ LONG("bigint", true),
+ FLOAT("float", true),
+ DOUBLE("double", true),
+ STRING("string", true),
+ DATE("date", true),
+ TIMESTAMP("timestamp", true),
+ BINARY("binary", true),
+ DECIMAL("decimal", true),
+ VARCHAR("varchar", true),
+ CHAR("char", true),
+ LIST("array", false),
+ MAP("map", false),
+ STRUCT("struct", false),
+ UNION("union", false);
+
+ Category(String name, boolean isPrimitive) {
+ this.name = name;
+ this.isPrimitive = isPrimitive;
+ }
+
+ final boolean isPrimitive;
+ final String name;
+
+ public boolean isPrimitive() {
+ return isPrimitive;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ public static TypeDescription createBoolean() {
+ return new TypeDescription(Category.BOOLEAN);
+ }
+
+ public static TypeDescription createByte() {
+ return new TypeDescription(Category.BYTE);
+ }
+
+ public static TypeDescription createShort() {
+ return new TypeDescription(Category.SHORT);
+ }
+
+ public static TypeDescription createInt() {
+ return new TypeDescription(Category.INT);
+ }
+
+ public static TypeDescription createLong() {
+ return new TypeDescription(Category.LONG);
+ }
+
+ public static TypeDescription createFloat() {
+ return new TypeDescription(Category.FLOAT);
+ }
+
+ public static TypeDescription createDouble() {
+ return new TypeDescription(Category.DOUBLE);
+ }
+
+ public static TypeDescription createString() {
+ return new TypeDescription(Category.STRING);
+ }
+
+ public static TypeDescription createDate() {
+ return new TypeDescription(Category.DATE);
+ }
+
+ public static TypeDescription createTimestamp() {
+ return new TypeDescription(Category.TIMESTAMP);
+ }
+
+ public static TypeDescription createBinary() {
+ return new TypeDescription(Category.BINARY);
+ }
+
+ public static TypeDescription createDecimal() {
+ return new TypeDescription(Category.DECIMAL);
+ }
+
+ /**
+ * For decimal types, set the precision.
+ * @param precision the new precision
+ * @return this
+ */
+ public TypeDescription withPrecision(int precision) {
+ if (category != Category.DECIMAL) {
+ throw new IllegalArgumentException("precision is only allowed on decimal"+
+ " and not " + category.name);
+ } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){
+ throw new IllegalArgumentException("precision " + precision +
+ " is out of range 1 .. " + scale);
+ }
+ this.precision = precision;
+ return this;
+ }
+
+ /**
+ * For decimal types, set the scale.
+ * @param scale the new scale
+ * @return this
+ */
+ public TypeDescription withScale(int scale) {
+ if (category != Category.DECIMAL) {
+ throw new IllegalArgumentException("scale is only allowed on decimal"+
+ " and not " + category.name);
+ } else if (scale < 0 || scale > MAX_SCALE || scale > precision) {
+ throw new IllegalArgumentException("scale is out of range at " + scale);
+ }
+ this.scale = scale;
+ return this;
+ }
+
+ public static TypeDescription createVarchar() {
+ return new TypeDescription(Category.VARCHAR);
+ }
+
+ public static TypeDescription createChar() {
+ return new TypeDescription(Category.CHAR);
+ }
+
+ /**
+ * Set the maximum length for char and varchar types.
+ * @param maxLength the maximum value
+ * @return this
+ */
+ public TypeDescription withMaxLength(int maxLength) {
+ if (category != Category.VARCHAR && category != Category.CHAR) {
+ throw new IllegalArgumentException("maxLength is only allowed on char" +
+ " and varchar and not " + category.name);
+ }
+ this.maxLength = maxLength;
+ return this;
+ }
+
+ public static TypeDescription createList(TypeDescription childType) {
+ TypeDescription result = new TypeDescription(Category.LIST);
+ result.children.add(childType);
+ childType.parent = result;
+ return result;
+ }
+
+ public static TypeDescription createMap(TypeDescription keyType,
+ TypeDescription valueType) {
+ TypeDescription result = new TypeDescription(Category.MAP);
+ result.children.add(keyType);
+ result.children.add(valueType);
+ keyType.parent = result;
+ valueType.parent = result;
+ return result;
+ }
+
+ public static TypeDescription createUnion() {
+ return new TypeDescription(Category.UNION);
+ }
+
+ public static TypeDescription createStruct() {
+ return new TypeDescription(Category.STRUCT);
+ }
+
+ /**
+ * Add a child to a union type.
+ * @param child a new child type to add
+ * @return the union type.
+ */
+ public TypeDescription addUnionChild(TypeDescription child) {
+ if (category != Category.UNION) {
+ throw new IllegalArgumentException("Can only add types to union type" +
+ " and not " + category);
+ }
+ children.add(child);
+ child.parent = this;
+ return this;
+ }
+
+ /**
+ * Add a field to a struct type as it is built.
+ * @param field the field name
+ * @param fieldType the type of the field
+ * @return the struct type
+ */
+ public TypeDescription addField(String field, TypeDescription fieldType) {
+ if (category != Category.STRUCT) {
+ throw new IllegalArgumentException("Can only add fields to struct type" +
+ " and not " + category);
+ }
+ fieldNames.add(field);
+ children.add(fieldType);
+ fieldType.parent = this;
+ return this;
+ }
+
+ /**
+ * Get the id for this type.
+ * The first call will cause all of the the ids in tree to be assigned, so
+ * it should not be called before the type is completely built.
+ * @return the sequential id
+ */
+ public int getId() {
+ // if the id hasn't been assigned, assign all of the ids from the root
+ if (id == -1) {
+ TypeDescription root = this;
+ while (root.parent != null) {
+ root = root.parent;
+ }
+ root.assignIds(0);
+ }
+ return id;
+ }
+
+ /**
+ * Get the maximum id assigned to this type or its children.
+ * The first call will cause all of the the ids in tree to be assigned, so
+ * it should not be called before the type is completely built.
+ * @return the maximum id assigned under this type
+ */
+ public int getMaximumId() {
+ // if the id hasn't been assigned, assign all of the ids from the root
+ if (maxId == -1) {
+ TypeDescription root = this;
+ while (root.parent != null) {
+ root = root.parent;
+ }
+ root.assignIds(0);
+ }
+ return maxId;
+ }
+
+ /**
+ * Get the kind of this type.
+ * @return get the category for this type.
+ */
+ public Category getCategory() {
+ return category;
+ }
+
+ /**
+ * Get the maximum length of the type. Only used for char and varchar types.
+ * @return the maximum length of the string type
+ */
+ public int getMaxLength() {
+ return maxLength;
+ }
+
+ /**
+ * Get the precision of the decimal type.
+ * @return the number of digits for the precision.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Get the scale of the decimal type.
+ * @return the number of digits for the scale.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+ /**
+ * For struct types, get the list of field names.
+ * @return the list of field names.
+ */
+ public List<String> getFieldNames() {
+ return Collections.unmodifiableList(fieldNames);
+ }
+
+ /**
+ * Get the subtypes of this type.
+ * @return the list of children types
+ */
+ public List<TypeDescription> getChildren() {
+ return children == null ? null : Collections.unmodifiableList(children);
+ }
+
+ /**
+ * Assign ids to all of the nodes under this one.
+ * @param startId the lowest id to assign
+ * @return the next available id
+ */
+ private int assignIds(int startId) {
+ id = startId++;
+ if (children != null) {
+ for (TypeDescription child : children) {
+ startId = child.assignIds(startId);
+ }
+ }
+ maxId = startId - 1;
+ return startId;
+ }
+
+ private TypeDescription(Category category) {
+ this.category = category;
+ if (category.isPrimitive) {
+ children = null;
+ } else {
+ children = new ArrayList<>();
+ }
+ if (category == Category.STRUCT) {
+ fieldNames = new ArrayList<>();
+ } else {
+ fieldNames = null;
+ }
+ }
+
+ private int id = -1;
+ private int maxId = -1;
+ private TypeDescription parent;
+ private final Category category;
+ private final List<TypeDescription> children;
+ private final List<String> fieldNames;
+ private int maxLength = DEFAULT_LENGTH;
+ private int precision = DEFAULT_PRECISION;
+ private int scale = DEFAULT_SCALE;
+
+ public void printToBuffer(StringBuilder buffer) {
+ buffer.append(category.name);
+ switch (category) {
+ case DECIMAL:
+ buffer.append('(');
+ buffer.append(precision);
+ buffer.append(',');
+ buffer.append(scale);
+ buffer.append(')');
+ break;
+ case CHAR:
+ case VARCHAR:
+ buffer.append('(');
+ buffer.append(maxLength);
+ buffer.append(')');
+ break;
+ case LIST:
+ case MAP:
+ case UNION:
+ buffer.append('<');
+ for(int i=0; i < children.size(); ++i) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+ children.get(i).printToBuffer(buffer);
+ }
+ buffer.append('>');
+ break;
+ case STRUCT:
+ buffer.append('<');
+ for(int i=0; i < children.size(); ++i) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+ buffer.append(fieldNames.get(i));
+ buffer.append(':');
+ children.get(i).printToBuffer(buffer);
+ }
+ buffer.append('>');
+ break;
+ default:
+ break;
+ }
+ }
+
+ public String toString() {
+ StringBuilder buffer = new StringBuilder();
+ printToBuffer(buffer);
+ return buffer.toString();
+ }
+
+ private void printJsonToBuffer(String prefix, StringBuilder buffer,
+ int indent) {
+ for(int i=0; i < indent; ++i) {
+ buffer.append(' ');
+ }
+ buffer.append(prefix);
+ buffer.append("{\"category\": \"");
+ buffer.append(category.name);
+ buffer.append("\", \"id\": ");
+ buffer.append(getId());
+ buffer.append(", \"max\": ");
+ buffer.append(maxId);
+ switch (category) {
+ case DECIMAL:
+ buffer.append(", \"precision\": ");
+ buffer.append(precision);
+ buffer.append(", \"scale\": ");
+ buffer.append(scale);
+ break;
+ case CHAR:
+ case VARCHAR:
+ buffer.append(", \"length\": ");
+ buffer.append(maxLength);
+ break;
+ case LIST:
+ case MAP:
+ case UNION:
+ buffer.append(", \"children\": [");
+ for(int i=0; i < children.size(); ++i) {
+ buffer.append('\n');
+ children.get(i).printJsonToBuffer("", buffer, indent + 2);
+ if (i != children.size() - 1) {
+ buffer.append(',');
+ }
+ }
+ buffer.append("]");
+ break;
+ case STRUCT:
+ buffer.append(", \"fields\": [");
+ for(int i=0; i < children.size(); ++i) {
+ buffer.append('\n');
+ children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ",
+ buffer, indent + 2);
+ if (i != children.size() - 1) {
+ buffer.append(',');
+ }
+ }
+ buffer.append(']');
+ break;
+ default:
+ break;
+ }
+ buffer.append('}');
+ }
+
+ public String toJson() {
+ StringBuilder buffer = new StringBuilder();
+ printJsonToBuffer("", buffer, 0);
+ return buffer.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
index 6411e3f..8991f2d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Writer.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.io.orc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
@@ -26,6 +28,13 @@ import java.util.List;
* The interface for writing ORC files.
*/
public interface Writer {
+
+ /**
+ * Get the schema for this writer
+ * @return the file schema
+ */
+ TypeDescription getSchema();
+
/**
* Add arbitrary meta-data to the ORC file. This may be called at any point
* until the Writer is closed. If the same key is passed a second time, the
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 7aa8d65..767d3f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO;
import org.apache.hadoop.hive.ql.io.orc.CompressionCodec.Modifier;
import org.apache.hadoop.hive.ql.io.orc.OrcFile.CompressionStrategy;
@@ -54,7 +53,6 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
@@ -72,9 +70,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspect
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.BytesWritable;
@@ -127,6 +122,8 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
private final int bufferSize;
private final long blockSize;
private final double paddingTolerance;
+ private final TypeDescription schema;
+
// the streams that make up the current stripe
private final Map<StreamName, BufferedStream> streams =
new TreeMap<StreamName, BufferedStream>();
@@ -165,6 +162,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
Path path,
Configuration conf,
ObjectInspector inspector,
+ TypeDescription schema,
long stripeSize,
CompressionKind compress,
int bufferSize,
@@ -183,6 +181,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
this.path = path;
this.conf = conf;
this.callback = callback;
+ this.schema = schema;
if (callback != null) {
callbackContext = new OrcFile.WriterContext(){
@@ -207,21 +206,18 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
this.memoryManager = memoryManager;
buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
- String allColumns = conf.get(IOConstants.COLUMNS);
- if (allColumns == null) {
- allColumns = getColumnNamesFromInspector(inspector);
- }
- this.bufferSize = getEstimatedBufferSize(allColumns, bufferSize);
+ int numColumns = schema.getMaximumId() + 1;
+ this.bufferSize = getEstimatedBufferSize(getMemoryAvailableForORC(),
+ codec != null, numColumns, bufferSize);
if (version == OrcFile.Version.V_0_11) {
/* do not write bloom filters for ORC v11 */
- this.bloomFilterColumns =
- OrcUtils.includeColumns(null, allColumns, inspector);
+ this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
} else {
this.bloomFilterColumns =
- OrcUtils.includeColumns(bloomFilterColumnNames, allColumns, inspector);
+ OrcUtils.includeColumns(bloomFilterColumnNames, schema);
}
this.bloomFilterFpp = bloomFilterFpp;
- treeWriter = createTreeWriter(inspector, streamFactory, false);
+ treeWriter = createTreeWriter(inspector, schema, streamFactory, false);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " +
MIN_ROW_INDEX_STRIDE);
@@ -231,62 +227,42 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
memoryManager.addWriter(path, stripeSize, this);
}
- private String getColumnNamesFromInspector(ObjectInspector inspector) {
- List<String> fieldNames = Lists.newArrayList();
- Joiner joiner = Joiner.on(",");
- if (inspector instanceof StructObjectInspector) {
- StructObjectInspector soi = (StructObjectInspector) inspector;
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- for(StructField sf : fields) {
- fieldNames.add(sf.getFieldName());
- }
- }
- return joiner.join(fieldNames);
- }
+ static int getEstimatedBufferSize(long availableMem,
+ boolean isCompressed,
+ int columnCount, int bs) {
+ if (columnCount > COLUMN_COUNT_THRESHOLD) {
+ // In BufferedStream, there are 3 outstream buffers (compressed,
+ // uncompressed and overflow) and list of previously compressed buffers.
+ // Since overflow buffer is rarely used, lets consider only 2 allocation.
+ // Also, initially, the list of compression buffers will be empty.
+ final int outStreamBuffers = isCompressed ? 2 : 1;
- @VisibleForTesting
- int getEstimatedBufferSize(int bs) {
- return getEstimatedBufferSize(conf.get(IOConstants.COLUMNS), bs);
- }
+ // max possible streams per column is 5. For string columns, there is
+ // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
+ final int maxStreams = 5;
- int getEstimatedBufferSize(String colNames, int bs) {
- long availableMem = getMemoryAvailableForORC();
- if (colNames != null) {
- final int numCols = colNames.split(",").length;
- if (numCols > COLUMN_COUNT_THRESHOLD) {
- // In BufferedStream, there are 3 outstream buffers (compressed,
- // uncompressed and overflow) and list of previously compressed buffers.
- // Since overflow buffer is rarely used, lets consider only 2 allocation.
- // Also, initially, the list of compression buffers will be empty.
- final int outStreamBuffers = codec == null ? 1 : 2;
-
- // max possible streams per column is 5. For string columns, there is
- // ROW_INDEX, PRESENT, DATA, LENGTH, DICTIONARY_DATA streams.
- final int maxStreams = 5;
-
- // Lets assume 10% memory for holding dictionary in memory and other
- // object allocations
- final long miscAllocation = (long) (0.1f * availableMem);
-
- // compute the available memory
- final long remainingMem = availableMem - miscAllocation;
-
- int estBufferSize = (int) (remainingMem /
- (maxStreams * outStreamBuffers * numCols));
- estBufferSize = getClosestBufferSize(estBufferSize, bs);
- if (estBufferSize > bs) {
- estBufferSize = bs;
- }
+ // Lets assume 10% memory for holding dictionary in memory and other
+ // object allocations
+ final long miscAllocation = (long) (0.1f * availableMem);
- LOG.info("WIDE TABLE - Number of columns: " + numCols +
- " Chosen compression buffer size: " + estBufferSize);
- return estBufferSize;
+ // compute the available memory
+ final long remainingMem = availableMem - miscAllocation;
+
+ int estBufferSize = (int) (remainingMem /
+ (maxStreams * outStreamBuffers * columnCount));
+ estBufferSize = getClosestBufferSize(estBufferSize);
+ if (estBufferSize > bs) {
+ estBufferSize = bs;
}
+
+ LOG.info("WIDE TABLE - Number of columns: " + columnCount +
+ " Chosen compression buffer size: " + estBufferSize);
+ return estBufferSize;
}
return bs;
}
- private int getClosestBufferSize(int estBufferSize, int bs) {
+ private static int getClosestBufferSize(int estBufferSize) {
final int kb4 = 4 * 1024;
final int kb8 = 8 * 1024;
final int kb16 = 16 * 1024;
@@ -546,15 +522,6 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
/**
- * Get the current column id. After creating all tree writers this count should tell how many
- * columns (including columns within nested complex objects) are created in total.
- * @return current column id
- */
- public int getCurrentColumnId() {
- return columnCount;
- }
-
- /**
* Get the stride rate of the row index.
*/
public int getRowIndexStride() {
@@ -666,11 +633,13 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
* Create a tree writer.
* @param columnId the column id of the column to write
* @param inspector the object inspector to use
+ * @param schema the row schema
* @param streamFactory limited access to the Writer's data.
* @param nullable can the value be null?
* @throws IOException
*/
TreeWriter(int columnId, ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory streamFactory,
boolean nullable) throws IOException {
this.streamFactory = streamFactory;
@@ -686,9 +655,9 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
this.foundNulls = false;
createBloomFilter = streamFactory.getBloomFilterColumns()[columnId];
- indexStatistics = ColumnStatisticsImpl.create(inspector);
- stripeColStatistics = ColumnStatisticsImpl.create(inspector);
- fileStatistics = ColumnStatisticsImpl.create(inspector);
+ indexStatistics = ColumnStatisticsImpl.create(schema);
+ stripeColStatistics = ColumnStatisticsImpl.create(schema);
+ fileStatistics = ColumnStatisticsImpl.create(schema);
childrenWriters = new TreeWriter[0];
rowIndex = OrcProto.RowIndex.newBuilder();
rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
@@ -749,7 +718,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
/**
* Add a new value to the column.
- * @param obj
+ * @param obj the object to write
* @throws IOException
*/
void write(Object obj) throws IOException {
@@ -919,9 +888,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
BooleanTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
PositionedOutputStream out = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.writer = new BitFieldWriter(out, 1);
@@ -958,9 +928,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
ByteTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.writer = new RunLengthByteWriter(writer.createStream(id,
OrcProto.Stream.Kind.DATA));
recordPosition(rowIndexPosition);
@@ -1003,9 +974,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
IntegerTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
OutStream out = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.isDirectV2 = isNewWriteFormat(writer);
@@ -1079,9 +1051,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
FloatTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.stream = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.utils = new SerializationUtils();
@@ -1123,9 +1096,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
DoubleTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.stream = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.utils = new SerializationUtils();
@@ -1184,9 +1158,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
StringTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
stringOutput = writer.createStream(id,
OrcProto.Stream.Kind.DICTIONARY_DATA);
@@ -1423,9 +1398,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
CharTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
}
/**
@@ -1445,9 +1421,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
VarcharTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
}
/**
@@ -1467,9 +1444,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
BinaryTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.stream = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.isDirectV2 = isNewWriteFormat(writer);
@@ -1531,9 +1509,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
TimestampTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
this.seconds = createIntegerWriter(writer.createStream(id,
OrcProto.Stream.Kind.DATA), true, isDirectV2, writer);
@@ -1610,9 +1589,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
DateTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
OutStream out = writer.createStream(id,
OrcProto.Stream.Kind.DATA);
this.isDirectV2 = isNewWriteFormat(writer);
@@ -1666,9 +1646,10 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
DecimalTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
valueStream = writer.createStream(id, OrcProto.Stream.Kind.DATA);
this.scaleStream = createIntegerWriter(writer.createStream(id,
@@ -1726,16 +1707,21 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
private final List<? extends StructField> fields;
StructTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
+ List<TypeDescription> children = schema.getChildren();
StructObjectInspector structObjectInspector =
(StructObjectInspector) inspector;
fields = structObjectInspector.getAllStructFieldRefs();
- childrenWriters = new TreeWriter[fields.size()];
+ childrenWriters = new TreeWriter[children.size()];
for(int i=0; i < childrenWriters.length; ++i) {
+ ObjectInspector childOI = i < fields.size() ?
+ fields.get(i).getFieldObjectInspector() : null;
childrenWriters[i] = createTreeWriter(
- fields.get(i).getFieldObjectInspector(), writer, true);
+ childOI, children.get(i), writer,
+ true);
}
recordPosition(rowIndexPosition);
}
@@ -1770,15 +1756,16 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
ListTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
- ListObjectInspector listObjectInspector = (ListObjectInspector) inspector;
+ ObjectInspector childOI =
+ ((ListObjectInspector) inspector).getListElementObjectInspector();
childrenWriters = new TreeWriter[1];
childrenWriters[0] =
- createTreeWriter(listObjectInspector.getListElementObjectInspector(),
- writer, true);
+ createTreeWriter(childOI, schema.getChildren().get(0), writer, true);
lengths = createIntegerWriter(writer.createStream(columnId,
OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
recordPosition(rowIndexPosition);
@@ -1834,16 +1821,20 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
MapTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
this.isDirectV2 = isNewWriteFormat(writer);
MapObjectInspector insp = (MapObjectInspector) inspector;
childrenWriters = new TreeWriter[2];
+ List<TypeDescription> children = schema.getChildren();
childrenWriters[0] =
- createTreeWriter(insp.getMapKeyObjectInspector(), writer, true);
+ createTreeWriter(insp.getMapKeyObjectInspector(), children.get(0),
+ writer, true);
childrenWriters[1] =
- createTreeWriter(insp.getMapValueObjectInspector(), writer, true);
+ createTreeWriter(insp.getMapValueObjectInspector(), children.get(1),
+ writer, true);
lengths = createIntegerWriter(writer.createStream(columnId,
OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
recordPosition(rowIndexPosition);
@@ -1901,14 +1892,17 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
UnionTreeWriter(int columnId,
ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory writer,
boolean nullable) throws IOException {
- super(columnId, inspector, writer, nullable);
+ super(columnId, inspector, schema, writer, nullable);
UnionObjectInspector insp = (UnionObjectInspector) inspector;
List<ObjectInspector> choices = insp.getObjectInspectors();
- childrenWriters = new TreeWriter[choices.size()];
+ List<TypeDescription> children = schema.getChildren();
+ childrenWriters = new TreeWriter[children.size()];
for(int i=0; i < childrenWriters.length; ++i) {
- childrenWriters[i] = createTreeWriter(choices.get(i), writer, true);
+ childrenWriters[i] = createTreeWriter(choices.get(i),
+ children.get(i), writer, true);
}
tags =
new RunLengthByteWriter(writer.createStream(columnId,
@@ -1949,168 +1943,151 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
private static TreeWriter createTreeWriter(ObjectInspector inspector,
+ TypeDescription schema,
StreamFactory streamFactory,
boolean nullable) throws IOException {
- switch (inspector.getCategory()) {
- case PRIMITIVE:
- switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) {
- case BOOLEAN:
- return new BooleanTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case BYTE:
- return new ByteTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case SHORT:
- case INT:
- case LONG:
- return new IntegerTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case FLOAT:
- return new FloatTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case DOUBLE:
- return new DoubleTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case STRING:
- return new StringTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case CHAR:
- return new CharTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case VARCHAR:
- return new VarcharTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case BINARY:
- return new BinaryTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case TIMESTAMP:
- return new TimestampTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case DATE:
- return new DateTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- case DECIMAL:
- return new DecimalTreeWriter(streamFactory.getNextColumnId(),
- inspector, streamFactory, nullable);
- default:
- throw new IllegalArgumentException("Bad primitive category " +
- ((PrimitiveObjectInspector) inspector).getPrimitiveCategory());
- }
+ switch (schema.getCategory()) {
+ case BOOLEAN:
+ return new BooleanTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case BYTE:
+ return new ByteTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case SHORT:
+ case INT:
+ case LONG:
+ return new IntegerTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case FLOAT:
+ return new FloatTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case DOUBLE:
+ return new DoubleTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case STRING:
+ return new StringTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case CHAR:
+ return new CharTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case VARCHAR:
+ return new VarcharTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case BINARY:
+ return new BinaryTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case TIMESTAMP:
+ return new TimestampTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case DATE:
+ return new DateTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
+ case DECIMAL:
+ return new DecimalTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
case STRUCT:
- return new StructTreeWriter(streamFactory.getNextColumnId(), inspector,
- streamFactory, nullable);
+ return new StructTreeWriter(streamFactory.getNextColumnId(),
+ inspector, schema, streamFactory, nullable);
case MAP:
return new MapTreeWriter(streamFactory.getNextColumnId(), inspector,
- streamFactory, nullable);
+ schema, streamFactory, nullable);
case LIST:
return new ListTreeWriter(streamFactory.getNextColumnId(), inspector,
- streamFactory, nullable);
+ schema, streamFactory, nullable);
case UNION:
return new UnionTreeWriter(streamFactory.getNextColumnId(), inspector,
- streamFactory, nullable);
+ schema, streamFactory, nullable);
default:
throw new IllegalArgumentException("Bad category: " +
- inspector.getCategory());
+ schema.getCategory());
}
}
private static void writeTypes(OrcProto.Footer.Builder builder,
- TreeWriter treeWriter) {
+ TypeDescription schema) {
OrcProto.Type.Builder type = OrcProto.Type.newBuilder();
- switch (treeWriter.inspector.getCategory()) {
- case PRIMITIVE:
- switch (((PrimitiveObjectInspector) treeWriter.inspector).
- getPrimitiveCategory()) {
- case BOOLEAN:
- type.setKind(OrcProto.Type.Kind.BOOLEAN);
- break;
- case BYTE:
- type.setKind(OrcProto.Type.Kind.BYTE);
- break;
- case SHORT:
- type.setKind(OrcProto.Type.Kind.SHORT);
- break;
- case INT:
- type.setKind(OrcProto.Type.Kind.INT);
- break;
- case LONG:
- type.setKind(OrcProto.Type.Kind.LONG);
- break;
- case FLOAT:
- type.setKind(OrcProto.Type.Kind.FLOAT);
- break;
- case DOUBLE:
- type.setKind(OrcProto.Type.Kind.DOUBLE);
- break;
- case STRING:
- type.setKind(OrcProto.Type.Kind.STRING);
- break;
- case CHAR:
- // The char length needs to be written to file and should be available
- // from the object inspector
- CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
- type.setKind(Type.Kind.CHAR);
- type.setMaximumLength(charTypeInfo.getLength());
- break;
- case VARCHAR:
- // The varchar length needs to be written to file and should be available
- // from the object inspector
- VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
- type.setKind(Type.Kind.VARCHAR);
- type.setMaximumLength(typeInfo.getLength());
- break;
- case BINARY:
- type.setKind(OrcProto.Type.Kind.BINARY);
- break;
- case TIMESTAMP:
- type.setKind(OrcProto.Type.Kind.TIMESTAMP);
- break;
- case DATE:
- type.setKind(OrcProto.Type.Kind.DATE);
- break;
- case DECIMAL:
- DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)((PrimitiveObjectInspector)treeWriter.inspector).getTypeInfo();
- type.setKind(OrcProto.Type.Kind.DECIMAL);
- type.setPrecision(decTypeInfo.precision());
- type.setScale(decTypeInfo.scale());
- break;
- default:
- throw new IllegalArgumentException("Unknown primitive category: " +
- ((PrimitiveObjectInspector) treeWriter.inspector).
- getPrimitiveCategory());
- }
+ List<TypeDescription> children = schema.getChildren();
+ switch (schema.getCategory()) {
+ case BOOLEAN:
+ type.setKind(OrcProto.Type.Kind.BOOLEAN);
+ break;
+ case BYTE:
+ type.setKind(OrcProto.Type.Kind.BYTE);
+ break;
+ case SHORT:
+ type.setKind(OrcProto.Type.Kind.SHORT);
+ break;
+ case INT:
+ type.setKind(OrcProto.Type.Kind.INT);
+ break;
+ case LONG:
+ type.setKind(OrcProto.Type.Kind.LONG);
+ break;
+ case FLOAT:
+ type.setKind(OrcProto.Type.Kind.FLOAT);
+ break;
+ case DOUBLE:
+ type.setKind(OrcProto.Type.Kind.DOUBLE);
+ break;
+ case STRING:
+ type.setKind(OrcProto.Type.Kind.STRING);
+ break;
+ case CHAR:
+ type.setKind(OrcProto.Type.Kind.CHAR);
+ type.setMaximumLength(schema.getMaxLength());
+ break;
+ case VARCHAR:
+ type.setKind(Type.Kind.VARCHAR);
+ type.setMaximumLength(schema.getMaxLength());
+ break;
+ case BINARY:
+ type.setKind(OrcProto.Type.Kind.BINARY);
+ break;
+ case TIMESTAMP:
+ type.setKind(OrcProto.Type.Kind.TIMESTAMP);
+ break;
+ case DATE:
+ type.setKind(OrcProto.Type.Kind.DATE);
+ break;
+ case DECIMAL:
+ type.setKind(OrcProto.Type.Kind.DECIMAL);
+ type.setPrecision(schema.getPrecision());
+ type.setScale(schema.getScale());
break;
case LIST:
type.setKind(OrcProto.Type.Kind.LIST);
- type.addSubtypes(treeWriter.childrenWriters[0].id);
+ type.addSubtypes(children.get(0).getId());
break;
case MAP:
type.setKind(OrcProto.Type.Kind.MAP);
- type.addSubtypes(treeWriter.childrenWriters[0].id);
- type.addSubtypes(treeWriter.childrenWriters[1].id);
+ for(TypeDescription t: children) {
+ type.addSubtypes(t.getId());
+ }
break;
case STRUCT:
type.setKind(OrcProto.Type.Kind.STRUCT);
- for(TreeWriter child: treeWriter.childrenWriters) {
- type.addSubtypes(child.id);
+ for(TypeDescription t: children) {
+ type.addSubtypes(t.getId());
}
- for(StructField field: ((StructTreeWriter) treeWriter).fields) {
- type.addFieldNames(field.getFieldName());
+ for(String field: schema.getFieldNames()) {
+ type.addFieldNames(field);
}
break;
case UNION:
type.setKind(OrcProto.Type.Kind.UNION);
- for(TreeWriter child: treeWriter.childrenWriters) {
- type.addSubtypes(child.id);
+ for(TypeDescription t: children) {
+ type.addSubtypes(t.getId());
}
break;
default:
throw new IllegalArgumentException("Unknown category: " +
- treeWriter.inspector.getCategory());
+ schema.getCategory());
}
builder.addTypes(type);
- for(TreeWriter child: treeWriter.childrenWriters) {
- writeTypes(builder, child);
+ if (children != null) {
+ for(TypeDescription child: children) {
+ writeTypes(builder, child);
+ }
}
}
@@ -2243,73 +2220,58 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
private long computeRawDataSize() {
- long result = 0;
- for (TreeWriter child : treeWriter.getChildrenWriters()) {
- result += getRawDataSizeFromInspectors(child, child.inspector);
- }
- return result;
+ return getRawDataSize(treeWriter, schema);
}
- private long getRawDataSizeFromInspectors(TreeWriter child, ObjectInspector oi) {
+ private long getRawDataSize(TreeWriter child,
+ TypeDescription schema) {
long total = 0;
- switch (oi.getCategory()) {
- case PRIMITIVE:
- total += getRawDataSizeFromPrimitives(child, oi);
- break;
- case LIST:
- case MAP:
- case UNION:
- case STRUCT:
- for (TreeWriter tw : child.childrenWriters) {
- total += getRawDataSizeFromInspectors(tw, tw.inspector);
- }
- break;
- default:
- LOG.debug("Unknown object inspector category.");
- break;
- }
- return total;
- }
-
- private long getRawDataSizeFromPrimitives(TreeWriter child, ObjectInspector oi) {
- long result = 0;
long numVals = child.fileStatistics.getNumberOfValues();
- switch (((PrimitiveObjectInspector) oi).getPrimitiveCategory()) {
- case BOOLEAN:
- case BYTE:
- case SHORT:
- case INT:
- case FLOAT:
- return numVals * JavaDataModel.get().primitive1();
- case LONG:
- case DOUBLE:
- return numVals * JavaDataModel.get().primitive2();
- case STRING:
- case VARCHAR:
- case CHAR:
- // ORC strings are converted to java Strings. so use JavaDataModel to
- // compute the overall size of strings
- child = (StringTreeWriter) child;
- StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
- numVals = numVals == 0 ? 1 : numVals;
- int avgStringLen = (int) (scs.getSum() / numVals);
- return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
- case DECIMAL:
- return numVals * JavaDataModel.get().lengthOfDecimal();
- case DATE:
- return numVals * JavaDataModel.get().lengthOfDate();
- case BINARY:
- // get total length of binary blob
- BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
- return bcs.getSum();
- case TIMESTAMP:
- return numVals * JavaDataModel.get().lengthOfTimestamp();
- default:
- LOG.debug("Unknown primitive category.");
- break;
+ switch (schema.getCategory()) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case FLOAT:
+ return numVals * JavaDataModel.get().primitive1();
+ case LONG:
+ case DOUBLE:
+ return numVals * JavaDataModel.get().primitive2();
+ case STRING:
+ case VARCHAR:
+ case CHAR:
+ // ORC strings are converted to java Strings. so use JavaDataModel to
+ // compute the overall size of strings
+ StringColumnStatistics scs = (StringColumnStatistics) child.fileStatistics;
+ numVals = numVals == 0 ? 1 : numVals;
+ int avgStringLen = (int) (scs.getSum() / numVals);
+ return numVals * JavaDataModel.get().lengthForStringOfLength(avgStringLen);
+ case DECIMAL:
+ return numVals * JavaDataModel.get().lengthOfDecimal();
+ case DATE:
+ return numVals * JavaDataModel.get().lengthOfDate();
+ case BINARY:
+ // get total length of binary blob
+ BinaryColumnStatistics bcs = (BinaryColumnStatistics) child.fileStatistics;
+ return bcs.getSum();
+ case TIMESTAMP:
+ return numVals * JavaDataModel.get().lengthOfTimestamp();
+ case LIST:
+ case MAP:
+ case UNION:
+ case STRUCT: {
+ TreeWriter[] childWriters = child.getChildrenWriters();
+ List<TypeDescription> childTypes = schema.getChildren();
+ for (int i=0; i < childWriters.length; ++i) {
+ total += getRawDataSize(childWriters[i], childTypes.get(i));
+ }
+ break;
+ }
+ default:
+ LOG.debug("Unknown object inspector category.");
+ break;
}
-
- return result;
+ return total;
}
private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) {
@@ -2356,7 +2318,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
// populate raw data size
rawDataSize = computeRawDataSize();
// serialize the types
- writeTypes(builder, treeWriter);
+ writeTypes(builder, schema);
// add the stripe information
for(OrcProto.StripeInformation stripe: stripes) {
builder.addStripes(stripe);
@@ -2385,7 +2347,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
.setMagic(OrcFile.MAGIC)
.addVersion(version.getMajor())
.addVersion(version.getMinor())
- .setWriterVersion(OrcFile.WriterVersion.HIVE_8732.getId());
+ .setWriterVersion(OrcFile.WriterVersion.HIVE_4243.getId());
if (compress != CompressionKind.NONE) {
builder.setCompressionBlockSize(bufferSize);
}
@@ -2410,6 +2372,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
}
@Override
+ public TypeDescription getSchema() {
+ return schema;
+ }
+
+ @Override
public void addUserMetadata(String name, ByteBuffer value) {
userMetadata.put(name, ByteString.copyFrom(value));
}
@@ -2493,12 +2460,11 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
getStream();
long start = rawWriter.getPos();
- long stripeLen = length;
long availBlockSpace = blockSize - (start % blockSize);
// see if stripe can fit in the current hdfs block, else pad the remaining
// space in the block
- if (stripeLen < blockSize && stripeLen > availBlockSpace &&
+ if (length < blockSize && length > availBlockSpace &&
addBlockPadding) {
byte[] pad = new byte[(int) Math.min(HDFS_BUFFER_SIZE, availBlockSpace)];
LOG.info(String.format("Padding ORC by %d bytes while merging..",
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
----------------------------------------------------------------------
diff --git a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 3b7a9b3..acadef9 100644
--- a/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ b/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -213,6 +213,7 @@ message PostScript {
// Version of the writer:
// 0 (or missing) = original
// 1 = HIVE-8732 fixed
+ // 2 = HIVE-4243 fixed
optional uint32 writerVersion = 6;
// Leave this last in the record
optional string magic = 8000;
http://git-wip-us.apache.org/repos/asf/hive/blob/7b1ed3d3/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
index 4d30377..4e3bc90 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
@@ -48,11 +48,10 @@ public class TestColumnStatistics {
@Test
public void testLongMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaIntObjectInspector;
+ TypeDescription schema = TypeDescription.createInt();
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateInteger(10);
stats1.updateInteger(10);
stats2.updateInteger(1);
@@ -71,11 +70,10 @@ public class TestColumnStatistics {
@Test
public void testDoubleMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
+ TypeDescription schema = TypeDescription.createDouble();
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateDouble(10.0);
stats1.updateDouble(100.0);
stats2.updateDouble(1.0);
@@ -95,11 +93,10 @@ public class TestColumnStatistics {
@Test
public void testStringMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+ TypeDescription schema = TypeDescription.createString();
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateString(new Text("bob"));
stats1.updateString(new Text("david"));
stats1.updateString(new Text("charles"));
@@ -119,11 +116,10 @@ public class TestColumnStatistics {
@Test
public void testDateMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaDateObjectInspector;
+ TypeDescription schema = TypeDescription.createDate();
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateDate(new DateWritable(1000));
stats1.updateDate(new DateWritable(100));
stats2.updateDate(new DateWritable(10));
@@ -142,11 +138,10 @@ public class TestColumnStatistics {
@Test
public void testTimestampMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
+ TypeDescription schema = TypeDescription.createTimestamp();
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateTimestamp(new Timestamp(10));
stats1.updateTimestamp(new Timestamp(100));
stats2.updateTimestamp(new Timestamp(1));
@@ -165,11 +160,11 @@ public class TestColumnStatistics {
@Test
public void testDecimalMerge() throws Exception {
- ObjectInspector inspector =
- PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector;
+ TypeDescription schema = TypeDescription.createDecimal()
+ .withPrecision(38).withScale(16);
- ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector);
- ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector);
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateDecimal(HiveDecimal.create(10));
stats1.updateDecimal(HiveDecimal.create(100));
stats2.updateDecimal(HiveDecimal.create(1));
[14/22] hive git commit: HIVE-11972 : [Refactor] Improve
determination of dynamic partitioning columns in FileSink Operator (Ashutosh
Chauhan via Prasanth J)
Posted by se...@apache.org.
HIVE-11972 : [Refactor] Improve determination of dynamic partitioning columns in FileSink Operator (Ashutosh Chauhan via Prasanth J)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24988f77
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24988f77
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24988f77
Branch: refs/heads/llap
Commit: 24988f77f2898bbcd91f5665b865bcc251e3cade
Parents: 522bb60
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Sat Sep 26 12:19:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Oct 1 11:41:53 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FileSinkOperator.java | 19 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 17 +
.../optimizer/ConstantPropagateProcFactory.java | 11 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 10 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 30 +-
.../hive/ql/plan/DynamicPartitionCtx.java | 27 --
.../hive/ql/exec/TestFileSinkOperator.java | 384 ++++++++++++-------
7 files changed, 284 insertions(+), 214 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 2604d5d..39944a9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -493,24 +493,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
assert inputObjInspectors.length == 1 : "FileSinkOperator should have 1 parent, but it has "
+ inputObjInspectors.length;
StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[0];
- // remove the last dpMapping.size() columns from the OI
- List<? extends StructField> fieldOI = soi.getAllStructFieldRefs();
- ArrayList<ObjectInspector> newFieldsOI = new ArrayList<ObjectInspector>();
- ArrayList<String> newFieldsName = new ArrayList<String>();
- this.dpStartCol = 0;
- for (StructField sf : fieldOI) {
- String fn = sf.getFieldName();
- if (!dpCtx.getInputToDPCols().containsKey(fn)) {
- newFieldsOI.add(sf.getFieldObjectInspector());
- newFieldsName.add(sf.getFieldName());
- this.dpStartCol++;
- } else {
- // once we found the start column for partition column we are done
- break;
- }
- }
- assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty";
-
+ this.dpStartCol = Utilities.getDPColOffset(conf);
this.subSetOI = new SubStructObjectInspector(soi, 0, this.dpStartCol);
this.dpVals = new ArrayList<String>(numDynParts);
this.dpWritables = new ArrayList<Object>(numDynParts);
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index bcf85a4..5b21af9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -119,6 +119,7 @@ import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
@@ -3916,4 +3917,20 @@ public final class Utilities {
HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD, "");
}
}
+
+ public static int getDPColOffset(FileSinkDesc conf) {
+
+ if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
+ // For deletes, there is only ROW__ID in non-partitioning, non-bucketing columns.
+ //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+ return 1;
+ } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
+ // For updates, ROW__ID is an extra column at index 0.
+ //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+ return getColumnNames(conf.getTableInfo().getProperties()).size() + 1;
+ } else {
+ return getColumnNames(conf.getTableInfo().getProperties()).size();
+ }
+
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index 5c6a6df..25156b2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -843,7 +843,7 @@ public final class ConstantPropagateProcFactory {
}
}
if (constant.getTypeInfo().getCategory() != Category.PRIMITIVE) {
- // nested complex types cannot be folded cleanly
+ // nested complex types cannot be folded cleanly
return null;
}
Object value = constant.getValue();
@@ -1163,16 +1163,15 @@ public final class ConstantPropagateProcFactory {
DynamicPartitionCtx dpCtx = fsdesc.getDynPartCtx();
if (dpCtx != null) {
- // If all dynamic partitions are propagated as constant, remove DP.
- Set<String> inputs = dpCtx.getInputToDPCols().keySet();
-
// Assume only 1 parent for FS operator
Operator<? extends Serializable> parent = op.getParentOperators().get(0);
Map<ColumnInfo, ExprNodeDesc> parentConstants = cppCtx.getPropagatedConstants(parent);
RowSchema rs = parent.getSchema();
boolean allConstant = true;
- for (String input : inputs) {
- ColumnInfo ci = rs.getColumnInfo(input);
+ int dpColStartIdx = Utilities.getDPColOffset(fsdesc);
+ List<ColumnInfo> colInfos = rs.getSignature();
+ for (int i = dpColStartIdx; i < colInfos.size(); i++) {
+ ColumnInfo ci = colInfos.get(i);
if (parentConstants.get(ci) == null) {
allConstant = false;
break;
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 02fbdfe..c696fd5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -497,9 +497,6 @@ public final class GenMapRedUtils {
partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
} catch (SemanticException e) {
throw e;
- } catch (HiveException e) {
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- throw new SemanticException(e.getMessage(), e);
}
}
@@ -990,7 +987,7 @@ public final class GenMapRedUtils {
fileSinkOp.setParentOperators(Utilities.makeList(parent));
// Create a dummy TableScanOperator for the file generated through fileSinkOp
- TableScanOperator tableScanOp = (TableScanOperator) createTemporaryTableScanOperator(
+ TableScanOperator tableScanOp = createTemporaryTableScanOperator(
parent.getSchema());
// Connect this TableScanOperator to child.
@@ -1235,19 +1232,16 @@ public final class GenMapRedUtils {
// adding DP ColumnInfo to the RowSchema signature
ArrayList<ColumnInfo> signature = inputRS.getSignature();
String tblAlias = fsInputDesc.getTableInfo().getTableName();
- LinkedHashMap<String, String> colMap = new LinkedHashMap<String, String>();
for (String dpCol : dpCtx.getDPColNames()) {
ColumnInfo colInfo = new ColumnInfo(dpCol,
TypeInfoFactory.stringTypeInfo, // all partition column type should be string
tblAlias, true); // partition column is virtual column
signature.add(colInfo);
- colMap.put(dpCol, dpCol); // input and output have the same column name
}
inputRS.setSignature(signature);
// create another DynamicPartitionCtx, which has a different input-to-DP column mapping
DynamicPartitionCtx dpCtx2 = new DynamicPartitionCtx(dpCtx);
- dpCtx2.setInputToDPCols(colMap);
fsOutputDesc.setDynPartCtx(dpCtx2);
// update the FileSinkOperator to include partition columns
@@ -1896,7 +1890,7 @@ public final class GenMapRedUtils {
"Partition Names, " + Arrays.toString(partNames) + " don't match partition Types, "
+ Arrays.toString(partTypes));
- Map<String, String> typeMap = new HashMap();
+ Map<String, String> typeMap = new HashMap<>();
for (int i = 0; i < partNames.length; i++) {
String previousValue = typeMap.put(partNames[i], partTypes[i]);
Preconditions.checkArgument(previousValue == null, "Partition columns configuration is inconsistent. "
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index dbc6d8f..4bec228 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -736,7 +736,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
Path dataDir = null;
if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
//currently only Insert into T values(...) is supported thus only 1 values clause
- //and only 1 target table are possible. If/when support for
+ //and only 1 target table are possible. If/when support for
//select ... from values(...) is added an insert statement may have multiple
//encrypted target tables.
dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
@@ -1556,7 +1556,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
for (String alias : tabAliases) {
String tab_name = qb.getTabNameForAlias(alias);
-
+
// we first look for this alias from CTE, and then from catalog.
/*
* if this s a CTE reference: Add its AST as a SubQuery to this QB.
@@ -6830,30 +6830,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
}
-
- rowFields = opParseCtx.get(input).getRowResolver()
- .getColumnInfos();
- if (deleting()) {
- // Figure out if we have partition columns in the list or not. If so,
- // add them into the mapping. Partition columns will be located after the row id.
- if (rowFields.size() > 1) {
- // This means we have partition columns to deal with, so set up the mapping from the
- // input to the partition columns.
- dpCtx.mapInputToDP(rowFields.subList(1, rowFields.size()));
- }
- } else if (updating()) {
- // In this case we expect the number of in fields to exceed the number of out fields by one
- // (for the ROW__ID virtual column). If there are more columns than this,
- // then the extras are for dynamic partitioning
- if (dynPart && dpCtx != null) {
- dpCtx.mapInputToDP(rowFields.subList(tableFields.size() + 1, rowFields.size()));
- }
- } else {
- if (dynPart && dpCtx != null) {
- // create the mapping from input ExprNode to dest table DP column
- dpCtx.mapInputToDP(rowFields.subList(tableFields.size(), rowFields.size()));
- }
- }
return input;
}
@@ -10105,7 +10081,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return;
}
for (Node child : node.getChildren()) {
- //each insert of multi insert looks like
+ //each insert of multi insert looks like
//(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME T1)))
if (((ASTNode) child).getToken().getType() != HiveParser.TOK_INSERT) {
continue;
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
index 24db7d0..95d5635 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/DynamicPartitionCtx.java
@@ -19,14 +19,11 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.metadata.Table;
public class DynamicPartitionCtx implements Serializable {
@@ -43,8 +40,6 @@ public class DynamicPartitionCtx implements Serializable {
private Path rootPath; // the root path DP columns paths start from
private int numBuckets; // number of buckets in each partition
- private Map<String, String> inputToDPCols; // mapping from input column names to DP columns
-
private List<String> spNames; // sp column names
private List<String> dpNames; // dp column names
private String defaultPartName; // default partition name in case of null or empty value
@@ -71,7 +66,6 @@ public class DynamicPartitionCtx implements Serializable {
}
this.numDPCols = dpNames.size();
this.numSPCols = spNames.size();
- this.inputToDPCols = new HashMap<String, String>();
if (this.numSPCols > 0) {
this.spPath = Warehouse.makeDynamicPartName(partSpec);
} else {
@@ -86,25 +80,12 @@ public class DynamicPartitionCtx implements Serializable {
this.spPath = dp.spPath;
this.rootPath = dp.rootPath;
this.numBuckets = dp.numBuckets;
- this.inputToDPCols = dp.inputToDPCols;
this.spNames = dp.spNames;
this.dpNames = dp.dpNames;
this.defaultPartName = dp.defaultPartName;
this.maxPartsPerNode = dp.maxPartsPerNode;
}
- public void mapInputToDP(List<ColumnInfo> fs) {
-
- assert fs.size() == this.numDPCols: "input DP column size != numDPCols";
-
- Iterator<ColumnInfo> itr1 = fs.iterator();
- Iterator<String> itr2 = dpNames.iterator();
-
- while (itr1.hasNext() && itr2.hasNext()) {
- inputToDPCols.put(itr1.next().getInternalName(), itr2.next());
- }
- }
-
public int getMaxPartitionsPerNode() {
return this.maxPartsPerNode;
}
@@ -161,14 +142,6 @@ public class DynamicPartitionCtx implements Serializable {
this.spNames = sp;
}
- public Map<String, String> getInputToDPCols() {
- return this.inputToDPCols;
- }
-
- public void setInputToDPCols(Map<String, String> map) {
- this.inputToDPCols = map;
- }
-
public void setNumDPCols(int dp) {
this.numDPCols = dp;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/24988f77/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
index c6ae030..9e89376 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFileSinkOperator.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.ValidTxnList;
-import org.apache.hadoop.hive.common.ValidReadTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
@@ -45,12 +44,11 @@ import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@@ -77,7 +75,6 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -94,8 +91,7 @@ public class TestFileSinkOperator {
private static TableDesc nonAcidTableDescriptor;
private static TableDesc acidTableDescriptor;
private static ObjectInspector inspector;
- private static List<TFSORow> rows;
- private static ValidTxnList txnList;
+ private static List<Row> rows;
private Path basePath;
private JobConf jc;
@@ -105,34 +101,33 @@ public class TestFileSinkOperator {
Properties properties = new Properties();
properties.setProperty(serdeConstants.SERIALIZATION_LIB, TFSOSerDe.class.getName());
nonAcidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
+ properties.setProperty(serdeConstants.LIST_COLUMNS,"data");
properties = new Properties(properties);
properties.setProperty(hive_metastoreConstants.BUCKET_COUNT, "1");
acidTableDescriptor = new TableDesc(TFSOInputFormat.class, TFSOOutputFormat.class, properties);
-
tmpdir = new File(System.getProperty("java.io.tmpdir") + System.getProperty("file.separator") +
"testFileSinkOperator");
tmpdir.mkdir();
tmpdir.deleteOnExit();
- txnList = new ValidReadTxnList(new long[]{}, 2);
}
@Test
public void testNonAcidWrite() throws Exception {
setBasePath("write");
- setupData(DataFormat.SIMPLE);
+ setupData(DataFormat.WITH_PARTITION_VALUE);
FileSinkOperator op = getFileSink(AcidUtils.Operation.NOT_ACID, false, 0);
processRows(op);
- confirmOutput();
+ confirmOutput(DataFormat.WITH_PARTITION_VALUE);
}
@Test
public void testInsert() throws Exception {
setBasePath("insert");
- setupData(DataFormat.SIMPLE);
+ setupData(DataFormat.WITH_PARTITION_VALUE);
FileSinkOperator op = getFileSink(AcidUtils.Operation.INSERT, false, 1);
processRows(op);
Assert.assertEquals("10", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_PARTITION_VALUE);
}
@Test
@@ -142,7 +137,7 @@ public class TestFileSinkOperator {
FileSinkOperator op = getFileSink(AcidUtils.Operation.UPDATE, false, 2);
processRows(op);
Assert.assertEquals("0", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_RECORD_ID);
}
@Test
@@ -152,7 +147,7 @@ public class TestFileSinkOperator {
FileSinkOperator op = getFileSink(AcidUtils.Operation.DELETE, false, 2);
processRows(op);
Assert.assertEquals("-10", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_RECORD_ID);
}
@Test
@@ -161,7 +156,7 @@ public class TestFileSinkOperator {
setupData(DataFormat.WITH_PARTITION_VALUE);
FileSinkOperator op = getFileSink(AcidUtils.Operation.NOT_ACID, true, 0);
processRows(op);
- confirmOutput();
+ confirmOutput(DataFormat.WITH_PARTITION_VALUE);
}
@@ -174,7 +169,7 @@ public class TestFileSinkOperator {
// We only expect 5 here because we'll get whichever of the partitions published its stats
// last.
Assert.assertEquals("5", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_PARTITION_VALUE);
}
@Test
@@ -184,19 +179,19 @@ public class TestFileSinkOperator {
FileSinkOperator op = getFileSink(AcidUtils.Operation.UPDATE, true, 2);
processRows(op);
Assert.assertEquals("0", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_RECORD_ID_AND_PARTITION_VALUE);
}
@Test
public void testDeleteDynamicPartitioning() throws Exception {
setBasePath("deleteDP");
- setupData(DataFormat.WITH_RECORD_ID_AND_PARTITION_VALUE);
+ setupData(DataFormat.WITH_RECORD_ID);
FileSinkOperator op = getFileSink(AcidUtils.Operation.DELETE, true, 2);
processRows(op);
// We only expect -5 here because we'll get whichever of the partitions published its stats
// last.
Assert.assertEquals("-5", TFSOStatsPublisher.stats.get(StatsSetupConst.ROW_COUNT));
- confirmOutput();
+ confirmOutput(DataFormat.WITH_RECORD_ID);
}
@@ -217,64 +212,52 @@ public class TestFileSinkOperator {
}
- private enum DataFormat {SIMPLE, WITH_RECORD_ID, WITH_PARTITION_VALUE,
- WITH_RECORD_ID_AND_PARTITION_VALUE};
+ private enum DataFormat {WITH_RECORD_ID, WITH_PARTITION_VALUE, WITH_RECORD_ID_AND_PARTITION_VALUE};
private void setupData(DataFormat format) {
- // Build object inspector
- inspector = ObjectInspectorFactory.getReflectionObjectInspector
- (TFSORow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
- rows = new ArrayList<TFSORow>();
-
+ Class<?> rType;
switch (format) {
- case SIMPLE:
- // Build rows
- for (int i = 0; i < 10; i++) {
- rows.add(
- new TFSORow(
- new Text("mary had a little lamb")
- )
- );
- }
+ case WITH_PARTITION_VALUE:
+ rType = RowWithPartVal.class;
break;
-
case WITH_RECORD_ID:
- for (int i = 0; i < 10; i++) {
- rows.add(
- new TFSORow(
- new Text("its fleect was white as snow"),
- new RecordIdentifier(1, 1, i)
- )
- );
- }
+ rType = RowWithRecID.class;
break;
-
- case WITH_PARTITION_VALUE:
- for (int i = 0; i < 10; i++) {
- rows.add(
- new TFSORow(
- new Text("its fleect was white as snow"),
- (i < 5) ? new Text("Monday") : new Text("Tuesday")
- )
- );
- }
- break;
-
case WITH_RECORD_ID_AND_PARTITION_VALUE:
- for (int i = 0; i < 10; i++) {
- rows.add(
- new TFSORow(
- new Text("its fleect was white as snow"),
- (i < 5) ? new Text("Monday") : new Text("Tuesday"),
- new RecordIdentifier(1, 1, i)
- )
- );
- }
+ rType = RowWithPartNRecID.class;
break;
-
default:
- throw new RuntimeException("Unknown option!");
+ throw new RuntimeException("Unknown type");
+ }
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (rType, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+
+ rows = new ArrayList<Row>();
+ Row r;
+ for (int i = 0; i < 10; i++) {
+ switch (format) {
+ case WITH_PARTITION_VALUE:
+ r =
+ new RowWithPartVal(
+ new Text("mary had a little lamb"),
+ (i < 5) ? new Text("Monday") : new Text("Tuesday"));
+ break;
+ case WITH_RECORD_ID:
+ r = new RowWithRecID(new RecordIdentifier(1, 1, i),
+ (i < 5) ? new Text("Monday") : new Text("Tuesday"));
+ break;
+ case WITH_RECORD_ID_AND_PARTITION_VALUE:
+ r = new RowWithPartNRecID(
+ new Text("its fleect was white as snow"),
+ (i < 5) ? new Text("Monday") : new Text("Tuesday"),
+ new RecordIdentifier(1, 1, i));
+ break;
+ default:
+ throw new RuntimeException("Unknown data format");
+ }
+ rows.add(r);
+
}
}
@@ -300,9 +283,6 @@ public class TestFileSinkOperator {
Map<String, String> partColMap= new LinkedHashMap<String, String>(1);
partColMap.put(PARTCOL_NAME, null);
DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(null, partColMap, "Sunday", 100);
- Map<String, String> partColNames = new HashMap<String, String>(1);
- partColNames.put(PARTCOL_NAME, PARTCOL_NAME);
- dpCtx.setInputToDPCols(partColNames);
//todo: does this need the finalDestination?
desc = new FileSinkDesc(basePath, tableDesc, false, 1, false, false, 1, 1, partCols, dpCtx, null);
} else {
@@ -320,27 +300,27 @@ public class TestFileSinkOperator {
}
private void processRows(FileSinkOperator op) throws HiveException {
- for (TFSORow r : rows) op.process(r, 0);
+ for (Object r : rows) op.process(r, 0);
op.jobCloseOp(jc, true);
op.close(false);
}
- private void confirmOutput() throws IOException, SerDeException {
+ private void confirmOutput(DataFormat rType) throws IOException, SerDeException, CloneNotSupportedException {
Path[] paths = findFilesInBasePath();
- TFSOInputFormat input = new TFSOInputFormat();
+ TFSOInputFormat input = new TFSOInputFormat(rType);
FileInputFormat.setInputPaths(jc, paths);
InputSplit[] splits = input.getSplits(jc, 1);
- RecordReader<NullWritable, TFSORow> reader = input.getRecordReader(splits[0], jc,
+ RecordReader<NullWritable, Row> reader = input.getRecordReader(splits[0], jc,
Mockito.mock(Reporter.class));
NullWritable key = reader.createKey();
- TFSORow value = reader.createValue();
- List<TFSORow> results = new ArrayList<TFSORow>(rows.size());
- List<TFSORow> sortedRows = new ArrayList<TFSORow>(rows.size());
+ Row value = reader.createValue();
+ List<Row> results = new ArrayList<Row>(rows.size());
+ List<Row> sortedRows = new ArrayList<Row>(rows.size());
for (int i = 0; i < rows.size(); i++) {
Assert.assertTrue(reader.next(key, value));
- results.add(new TFSORow(value));
- sortedRows.add(new TFSORow(rows.get(i)));
+ results.add(value.clone());
+ sortedRows.add(rows.get(i));
}
Assert.assertFalse(reader.next(key, value));
Collections.sort(results);
@@ -370,36 +350,172 @@ public class TestFileSinkOperator {
}
}
- private static class TFSORow implements WritableComparable<TFSORow> {
+ public static interface Row extends WritableComparable<Row> {
+
+ Row clone() throws CloneNotSupportedException;
+ }
+
+ private static class RowWithRecID implements Row {
+
private RecordIdentifier recId;
- private Text data;
private Text partVal;
- TFSORow() {
- this(null, null, null);
+ public RowWithRecID() {
+ }
+ public RowWithRecID(RecordIdentifier recId, Text partVal) {
+ super();
+ this.recId = recId;
+ this.partVal = partVal;
}
- TFSORow(Text t) {
- this(t, null, null);
+ @Override
+ public
+ Row clone() throws CloneNotSupportedException {
+ return new RowWithRecID(this.recId, this.partVal);
}
- TFSORow(Text t, Text pv) {
- this(t, pv, null);
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ if (partVal == null) {
+ dataOutput.writeBoolean(false);
+ } else {
+ dataOutput.writeBoolean(true);
+ partVal.write(dataOutput);
+ }
+ if (recId == null) {
+ dataOutput.writeBoolean(false);
+ } else {
+ dataOutput.writeBoolean(true);
+ recId.write(dataOutput);
+ }
}
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ boolean notNull = dataInput.readBoolean();
+ if (notNull) {
+ partVal = new Text();
+ partVal.readFields(dataInput);
+ }
+ notNull = dataInput.readBoolean();
+ if (notNull) {
+ recId = new RecordIdentifier();
+ recId.readFields(dataInput);
+ }
- TFSORow(Text t, RecordIdentifier ri) {
- this(t, null, ri);
}
+ @Override
+ public int compareTo(Row row) {
+ RowWithRecID other = (RowWithRecID) row;
+ if (recId == null && other.recId == null) {
+ return comparePartVal(other);
+ } else if (recId == null) {
+ return -1;
+ } else {
+ int rc = recId.compareTo(other.recId);
+ if (rc == 0) return comparePartVal(other);
+ else return rc;
+ }
+ }
+ private int comparePartVal(RowWithRecID other) {
- TFSORow(Text t, Text pv, RecordIdentifier ri) {
+ return partVal.compareTo(other.partVal);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return compareTo((RowWithRecID)obj) == 0;
+ }
+ }
+ private static class RowWithPartVal implements Row {
+
+ public RowWithPartVal(Text data, Text partVal) {
+ super();
+ this.data = data;
+ this.partVal = partVal;
+ }
+
+ public RowWithPartVal() {
+ }
+
+ private Text data;
+ private Text partVal;
+
+ @Override
+ public Row clone() throws CloneNotSupportedException {
+ return new RowWithPartVal(this.data, this.partVal);
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ data.write(dataOutput);
+ if (partVal == null) {
+ dataOutput.writeBoolean(false);
+ } else {
+ dataOutput.writeBoolean(true);
+ partVal.write(dataOutput);
+ }
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ data = new Text();
+ data.readFields(dataInput);
+ boolean notNull = dataInput.readBoolean();
+ if (notNull) {
+ partVal = new Text();
+ partVal.readFields(dataInput);
+ }
+ }
+
+ @Override
+ public int compareTo(Row row) {
+ RowWithPartVal other = (RowWithPartVal) row;
+ if (partVal == null && other.partVal == null) {
+ return compareData(other);
+ } else if (partVal == null) {
+ return -1;
+ } else {
+ int rc = partVal.compareTo(other.partVal);
+ if (rc == 0) return compareData(other);
+ else return rc;
+ }
+ }
+
+ private int compareData(RowWithPartVal other) {
+ if (data == null && other.data == null) return 0;
+ else if (data == null) return -1;
+ else return data.compareTo(other.data);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj instanceof RowWithPartVal) {
+ RowWithPartVal other = (RowWithPartVal) obj;
+ return compareTo(other) == 0;
+
+ } else {
+ return false;
+ }
+ }
+ }
+ private static class RowWithPartNRecID implements Row {
+ private RecordIdentifier recId;
+ private Text data;
+ private Text partVal;
+
+ RowWithPartNRecID() {
+ this(null, null, null);
+ }
+
+ RowWithPartNRecID(Text t, Text pv, RecordIdentifier ri) {
data = t;
partVal = pv;
recId = ri;
-
}
- TFSORow(TFSORow other) {
- this(other.data, other.partVal, other.recId);
+ @Override
+ public RowWithPartNRecID clone() throws CloneNotSupportedException {
+ return new RowWithPartNRecID(this.data, this.partVal, this.recId);
}
@Override
@@ -437,8 +553,8 @@ public class TestFileSinkOperator {
@Override
public boolean equals(Object obj) {
- if (obj instanceof TFSORow) {
- TFSORow other = (TFSORow) obj;
+ if (obj instanceof RowWithPartNRecID) {
+ RowWithPartNRecID other = (RowWithPartNRecID) obj;
if (data == null && other.data == null) return checkPartVal(other);
else if (data == null) return false;
else if (data.equals(other.data)) return checkPartVal(other);
@@ -448,21 +564,22 @@ public class TestFileSinkOperator {
}
}
- private boolean checkPartVal(TFSORow other) {
+ private boolean checkPartVal(RowWithPartNRecID other) {
if (partVal == null && other.partVal == null) return checkRecId(other);
else if (partVal == null) return false;
else if (partVal.equals(other.partVal)) return checkRecId(other);
else return false;
}
- private boolean checkRecId(TFSORow other) {
+ private boolean checkRecId(RowWithPartNRecID other) {
if (recId == null && other.recId == null) return true;
else if (recId == null) return false;
else return recId.equals(other.recId);
}
@Override
- public int compareTo(TFSORow other) {
+ public int compareTo(Row row) {
+ RowWithPartNRecID other = (RowWithPartNRecID) row;
if (recId == null && other.recId == null) {
return comparePartVal(other);
} else if (recId == null) {
@@ -474,7 +591,7 @@ public class TestFileSinkOperator {
}
}
- private int comparePartVal(TFSORow other) {
+ private int comparePartVal(RowWithPartNRecID other) {
if (partVal == null && other.partVal == null) {
return compareData(other);
} else if (partVal == null) {
@@ -486,21 +603,26 @@ public class TestFileSinkOperator {
}
}
- private int compareData(TFSORow other) {
+ private int compareData(RowWithPartNRecID other) {
if (data == null && other.data == null) return 0;
else if (data == null) return -1;
else return data.compareTo(other.data);
}
}
- private static class TFSOInputFormat extends FileInputFormat<NullWritable, TFSORow>
- implements AcidInputFormat<NullWritable, TFSORow> {
+ private static class TFSOInputFormat extends FileInputFormat<NullWritable, Row>
+ implements AcidInputFormat<NullWritable, Row> {
FSDataInputStream in[] = null;
int readingFrom = -1;
+ DataFormat rType;
+
+ public TFSOInputFormat(DataFormat rType) {
+ this.rType = rType;
+ }
@Override
- public RecordReader<NullWritable, TFSORow> getRecordReader(
+ public RecordReader<NullWritable, Row> getRecordReader(
InputSplit inputSplit, JobConf entries, Reporter reporter) throws IOException {
if (in == null) {
Path paths[] = FileInputFormat.getInputPaths(entries);
@@ -511,10 +633,10 @@ public class TestFileSinkOperator {
}
readingFrom = 0;
}
- return new RecordReader<NullWritable, TFSORow>() {
+ return new RecordReader<NullWritable, Row>() {
@Override
- public boolean next(NullWritable nullWritable, TFSORow tfsoRecord) throws
+ public boolean next(NullWritable nullWritable, Row tfsoRecord) throws
IOException {
try {
tfsoRecord.readFields(in[readingFrom]);
@@ -532,8 +654,18 @@ public class TestFileSinkOperator {
}
@Override
- public TFSORow createValue() {
- return new TFSORow();
+ public Row createValue() {
+ switch (rType) {
+ case WITH_RECORD_ID_AND_PARTITION_VALUE:
+ return new RowWithPartNRecID();
+ case WITH_PARTITION_VALUE:
+ return new RowWithPartVal();
+ case WITH_RECORD_ID:
+ return new RowWithRecID();
+
+ default:
+ throw new RuntimeException("Unknown row Type");
+ }
}
@Override
@@ -554,14 +686,14 @@ public class TestFileSinkOperator {
}
@Override
- public RowReader<TFSORow> getReader(InputSplit split,
+ public RowReader<Row> getReader(InputSplit split,
Options options) throws
IOException {
return null;
}
@Override
- public RawReader<TFSORow> getRawReader(Configuration conf,
+ public RawReader<Row> getRawReader(Configuration conf,
boolean collapseEvents,
int bucket,
ValidTxnList validTxnList,
@@ -578,9 +710,9 @@ public class TestFileSinkOperator {
}
}
- public static class TFSOOutputFormat extends FileOutputFormat<NullWritable, TFSORow>
- implements AcidOutputFormat<NullWritable, TFSORow> {
- List<TFSORow> records = new ArrayList<TFSORow>();
+ public static class TFSOOutputFormat extends FileOutputFormat<NullWritable, Row>
+ implements AcidOutputFormat<NullWritable, Row> {
+ List<Row> records = new ArrayList<>();
long numRecordsAdded = 0;
FSDataOutputStream out = null;
@@ -588,7 +720,6 @@ public class TestFileSinkOperator {
public RecordUpdater getRecordUpdater(final Path path, final Options options) throws
IOException {
- final StructObjectInspector inspector = (StructObjectInspector)options.getInspector();
return new RecordUpdater() {
@Override
public void insert(long currentTransaction, Object row) throws IOException {
@@ -608,9 +739,8 @@ public class TestFileSinkOperator {
}
private void addRow(Object row) {
- assert row instanceof TFSORow : "Expected TFSORow but got " +
- row.getClass().getName();
- records.add((TFSORow)row);
+ assert row instanceof Row : "Expected Row but got " + row.getClass().getName();
+ records.add((Row)row);
}
@Override
@@ -619,7 +749,7 @@ public class TestFileSinkOperator {
FileSystem fs = path.getFileSystem(options.getConfiguration());
out = fs.create(path);
}
- for (TFSORow r : records) r.write(out);
+ for (Writable r : records) r.write(out);
records.clear();
out.flush();
}
@@ -657,8 +787,8 @@ public class TestFileSinkOperator {
return new FileSinkOperator.RecordWriter() {
@Override
public void write(Writable w) throws IOException {
- Assert.assertTrue(w instanceof TFSORow);
- records.add((TFSORow) w);
+ Assert.assertTrue(w instanceof Row);
+ records.add((Row)w);
}
@Override
@@ -667,7 +797,7 @@ public class TestFileSinkOperator {
FileSystem fs = finalOutPath.getFileSystem(jc);
out = fs.create(finalOutPath);
}
- for (TFSORow r : records) r.write(out);
+ for (Writable r : records) r.write(out);
records.clear();
out.flush();
out.close();
@@ -676,7 +806,7 @@ public class TestFileSinkOperator {
}
@Override
- public RecordWriter<NullWritable, TFSORow> getRecordWriter(
+ public RecordWriter<NullWritable, Row> getRecordWriter(
FileSystem fileSystem, JobConf entries, String s, Progressable progressable) throws
IOException {
return null;
@@ -688,7 +818,7 @@ public class TestFileSinkOperator {
}
}
- public static class TFSOSerDe implements SerDe {
+ public static class TFSOSerDe extends AbstractSerDe {
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
@@ -697,20 +827,18 @@ public class TestFileSinkOperator {
@Override
public Class<? extends Writable> getSerializedClass() {
- return TFSORow.class;
+ return RowWithPartNRecID.class;
}
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
- assert obj instanceof TFSORow : "Expected TFSORow or decendent, got "
- + obj.getClass().getName();
- return (TFSORow)obj;
+ assert obj instanceof Row : "Expected Row or decendent, got " + obj.getClass().getName();
+ return (Row)obj;
}
@Override
public Object deserialize(Writable blob) throws SerDeException {
- assert blob instanceof TFSORow : "Expected TFSORow or decendent, got "
- + blob.getClass().getName();
+ assert blob instanceof Row : "Expected Row or decendent, got "+ blob.getClass().getName();
return blob;
}
[22/22] hive git commit: HIVE-12015 : LLAP: merge master into branch
(Sergey Shelukhin)
Posted by se...@apache.org.
HIVE-12015 : LLAP: merge master into branch (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c5ccf669
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c5ccf669
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c5ccf669
Branch: refs/heads/llap
Commit: c5ccf6694490a63329b3f4d9040dd976abd9d790
Parents: a1bc2ef 5074423
Author: Sergey Shelukhin <se...@apache.org>
Authored: Thu Oct 1 19:38:52 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Thu Oct 1 19:38:52 2015 -0700
----------------------------------------------------------------------
.../common/metrics/common/MetricsConstant.java | 5 +
.../hcatalog/pig/TestHCatLoaderEncryption.java | 3 +
.../hive/ql/security/FolderPermissionBase.java | 53 +-
.../upgrade/derby/021-HIVE-11970.derby.sql | 6 +
.../upgrade/derby/hive-schema-1.3.0.derby.sql | 12 +-
.../upgrade/derby/hive-schema-2.0.0.derby.sql | 12 +-
.../derby/upgrade-1.2.0-to-1.3.0.derby.sql | 1 +
.../derby/upgrade-1.2.0-to-2.0.0.derby.sql | 3 +-
.../upgrade/mssql/007-HIVE-11970.mssql.sql | 6 +
.../upgrade/mssql/hive-schema-1.3.0.mssql.sql | 12 +-
.../upgrade/mssql/hive-schema-2.0.0.mssql.sql | 12 +-
.../mssql/upgrade-1.2.0-to-1.3.0.mssql.sql | 1 +
.../mssql/upgrade-1.2.0-to-2.0.0.mssql.sql | 7 +-
.../upgrade/mysql/022-HIVE-11970.mysql.sql | 6 +
.../upgrade/mysql/hive-schema-1.3.0.mysql.sql | 12 +-
.../upgrade/mysql/hive-schema-2.0.0.mysql.sql | 12 +-
.../mysql/upgrade-1.2.0-to-1.3.0.mysql.sql | 1 +
.../mysql/upgrade-1.2.0-to-2.0.0.mysql.sql | 2 +
.../upgrade/oracle/022-HIVE-11970.oracle.sql | 23 +
.../upgrade/oracle/hive-schema-1.3.0.oracle.sql | 12 +-
.../upgrade/oracle/hive-schema-2.0.0.oracle.sql | 12 +-
.../oracle/upgrade-1.2.0-to-1.3.0.oracle.sql | 2 +
.../oracle/upgrade-1.2.0-to-2.0.0.oracle.sql | 2 +
.../postgres/021-HIVE-11970.postgres.sql | 6 +
.../postgres/hive-schema-1.3.0.postgres.sql | 12 +-
.../postgres/hive-schema-2.0.0.postgres.sql | 12 +-
.../upgrade-1.2.0-to-1.3.0.postgres.sql | 1 +
.../upgrade-1.2.0-to-2.0.0.postgres.sql | 1 +
.../hive/metastore/MetaStoreDirectSql.java | 34 +-
.../hadoop/hive/metastore/txn/TxnHandler.java | 159 +++---
.../hadoop/hive/ql/exec/FileSinkOperator.java | 19 +-
.../apache/hadoop/hive/ql/exec/Utilities.java | 16 +
.../hadoop/hive/ql/io/HiveFileFormatUtils.java | 95 +++-
.../hadoop/hive/ql/io/InputFormatChecker.java | 5 +-
.../hadoop/hive/ql/io/RCFileInputFormat.java | 3 +-
.../ql/io/SequenceFileInputFormatChecker.java | 3 +-
.../hive/ql/io/VectorizedRCFileInputFormat.java | 3 +-
.../hive/ql/io/orc/ColumnStatisticsImpl.java | 55 +-
.../apache/hadoop/hive/ql/io/orc/InStream.java | 25 +
.../hive/ql/io/orc/MetadataReaderImpl.java | 2 +-
.../apache/hadoop/hive/ql/io/orc/OrcFile.java | 33 +-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 +-
.../hadoop/hive/ql/io/orc/OrcOutputFormat.java | 145 ++++-
.../apache/hadoop/hive/ql/io/orc/OrcUtils.java | 177 +-----
.../hadoop/hive/ql/io/orc/ReaderImpl.java | 45 +-
.../hadoop/hive/ql/io/orc/TypeDescription.java | 466 ++++++++++++++++
.../ql/io/orc/VectorizedOrcInputFormat.java | 2 +-
.../apache/hadoop/hive/ql/io/orc/Writer.java | 9 +
.../hadoop/hive/ql/io/orc/WriterImpl.java | 550 +++++++++----------
.../hadoop/hive/ql/lockmgr/DbLockManager.java | 21 +
.../zookeeper/ZooKeeperHiveLockManager.java | 41 ++
.../apache/hadoop/hive/ql/metadata/Hive.java | 108 +---
.../hive/ql/optimizer/ColumnPrunerProcCtx.java | 2 +-
.../optimizer/ConstantPropagateProcFactory.java | 11 +-
.../hive/ql/optimizer/GenMapRedUtils.java | 10 +-
.../calcite/translator/HiveGBOpConvUtil.java | 43 +-
.../hadoop/hive/ql/parse/FromClauseParser.g | 30 +-
.../apache/hadoop/hive/ql/parse/HiveParser.g | 7 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 33 +-
.../hive/ql/plan/DynamicPartitionCtx.java | 27 -
.../hadoop/hive/ql/io/orc/orc_proto.proto | 1 +
.../hive/ql/exec/TestFileSinkOperator.java | 386 ++++++++-----
.../hive/ql/io/orc/TestColumnStatistics.java | 43 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 15 +-
.../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 +-
.../hive/ql/io/orc/TestOrcRawRecordMerger.java | 2 +-
.../hadoop/hive/ql/io/orc/TestOrcWideTable.java | 224 +-------
.../hive/ql/io/orc/TestTypeDescription.java | 67 +++
.../zookeeper/TestZookeeperLockManager.java | 50 ++
.../hive/ql/txn/compactor/CompactorTest.java | 2 +-
.../cbo_rp_gby2_map_multi_distinct.q | 38 ++
ql/src/test/queries/clientpositive/join_parse.q | 20 +
.../queries/clientpositive/update_all_types.q | 2 +-
.../clientpositive/windowing_windowspec2.q | 16 +-
.../resources/orc-file-dump-bloomfilter.out | 2 +-
.../resources/orc-file-dump-bloomfilter2.out | 2 +-
.../orc-file-dump-dictionary-threshold.out | 2 +-
ql/src/test/resources/orc-file-dump.json | 2 +-
ql/src/test/resources/orc-file-dump.out | 2 +-
ql/src/test/resources/orc-file-has-null.out | 2 +-
.../clientnegative/cte_with_in_subquery.q.out | 2 +-
.../clientpositive/annotate_stats_part.q.out | 6 +-
.../clientpositive/annotate_stats_table.q.out | 4 +-
.../cbo_rp_gby2_map_multi_distinct.q.out | 236 ++++++++
.../dynpart_sort_opt_vectorization.q.out | 16 +-
.../dynpart_sort_optimization2.q.out | 8 +-
.../extrapolate_part_stats_full.q.out | 24 +-
.../extrapolate_part_stats_partial.q.out | 76 +--
.../extrapolate_part_stats_partial_ndv.q.out | 38 +-
.../results/clientpositive/join_parse.q.out | 516 +++++++++++++++++
.../results/clientpositive/orc_analyze.q.out | 46 +-
.../results/clientpositive/orc_file_dump.q.out | 18 +-
.../clientpositive/orc_int_type_promotion.q.out | 6 +-
.../clientpositive/spark/vectorized_ptf.q.out | 108 ++--
.../tez/dynpart_sort_opt_vectorization.q.out | 16 +-
.../tez/dynpart_sort_optimization2.q.out | 8 +-
.../clientpositive/tez/orc_analyze.q.out | 46 +-
.../clientpositive/tez/union_fast_stats.q.out | 16 +-
.../clientpositive/tez/update_all_types.q.out | 4 +-
.../clientpositive/tez/vector_outer_join1.q.out | 48 +-
.../clientpositive/tez/vector_outer_join4.q.out | 48 +-
.../clientpositive/tez/vectorized_ptf.q.out | 108 ++--
.../clientpositive/union_fast_stats.q.out | 16 +-
.../clientpositive/update_all_types.q.out | 4 +-
.../results/clientpositive/vectorized_ptf.q.out | 104 ++--
.../clientpositive/windowing_windowspec2.q.out | 198 +++----
106 files changed, 3256 insertions(+), 1815 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 1c350db,5b21af9..1d79aff
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@@ -3920,38 -3918,19 +3921,53 @@@ public final class Utilities
}
}
+ /**
+ * Returns the full path to the Jar containing the class. It always return a JAR.
+ *
+ * @param klass
+ * class.
+ *
+ * @return path to the Jar containing the class.
+ */
+ @SuppressWarnings("rawtypes")
+ public static String jarFinderGetJar(Class klass) {
+ Preconditions.checkNotNull(klass, "klass");
+ ClassLoader loader = klass.getClassLoader();
+ if (loader != null) {
+ String class_file = klass.getName().replaceAll("\\.", "/") + ".class";
+ try {
+ for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) {
+ URL url = (URL) itr.nextElement();
+ String path = url.getPath();
+ if (path.startsWith("file:")) {
+ path = path.substring("file:".length());
+ }
+ path = URLDecoder.decode(path, "UTF-8");
+ if ("jar".equals(url.getProtocol())) {
+ path = URLDecoder.decode(path, "UTF-8");
+ return path.replaceAll("!.*$", "");
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return null;
+ }
+
+ public static int getDPColOffset(FileSinkDesc conf) {
+
+ if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
+ // For deletes, there is only ROW__ID in non-partitioning, non-bucketing columns.
+ //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+ return 1;
+ } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
+ // For updates, ROW__ID is an extra column at index 0.
+ //See : UpdateDeleteSemanticAnalyzer::reparseAndSuperAnalyze() for details.
+ return getColumnNames(conf.getTableInfo().getProperties()).size() + 1;
+ } else {
+ return getColumnNames(conf.getTableInfo().getProperties()).size();
+ }
+
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
index 8f0824b,6fec8b7..3dde0c4
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/InStream.java
@@@ -34,13 -35,12 +35,14 @@@ import com.google.protobuf.CodedInputSt
public abstract class InStream extends InputStream {
private static final Log LOG = LogFactory.getLog(InStream.class);
+ private static final int PROTOBUF_MESSAGE_MAX_LIMIT = 1024 << 20; // 1GB
+ protected final Long fileId;
protected final String name;
- protected final long length;
+ protected long length;
- public InStream(String name, long length) {
+ public InStream(Long fileId, String name, long length) {
+ this.fileId = fileId;
this.name = name;
this.length = length;
}
@@@ -468,9 -444,31 +470,32 @@@
CompressionCodec codec,
int bufferSize) throws IOException {
if (codec == null) {
- return new UncompressedStream(name, input, length);
+ return new UncompressedStream(fileId, name, input, length);
} else {
- return new CompressedStream(name, input, length, codec, bufferSize);
+ return new CompressedStream(fileId, name, input, length, codec, bufferSize);
}
}
+
+ /**
+ * Creates coded input stream (used for protobuf message parsing) with higher message size limit.
+ *
+ * @param name the name of the stream
+ * @param input the list of ranges of bytes for the stream; from disk or cache
+ * @param length the length in bytes of the stream
+ * @param codec the compression codec
+ * @param bufferSize the compression buffer size
+ * @return coded input stream
+ * @throws IOException
+ */
- public static CodedInputStream createCodedInputStream(String name,
++ public static CodedInputStream createCodedInputStream(Long fileId,
++ String name,
+ List<DiskRange> input,
+ long length,
+ CompressionCodec codec,
+ int bufferSize) throws IOException {
- InStream inStream = create(name, input, length, codec, bufferSize);
++ InStream inStream = create(fileId, name, input, length, codec, bufferSize);
+ CodedInputStream codedInputStream = CodedInputStream.newInstance(inStream);
+ codedInputStream.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
+ return codedInputStream;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
index 1456df3,0000000..5afba51
mode 100644,000000..100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/MetadataReaderImpl.java
@@@ -1,123 -1,0 +1,123 @@@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk;
+
+import com.google.common.collect.Lists;
+
+public class MetadataReaderImpl implements MetadataReader {
+ private final FSDataInputStream file;
+ private final CompressionCodec codec;
+ private final int bufferSize;
+ private final int typeCount;
+
+ public MetadataReaderImpl(FileSystem fileSystem, Path path,
+ CompressionCodec codec, int bufferSize, int typeCount) throws IOException {
+ this(fileSystem.open(path), codec, bufferSize, typeCount);
+ }
+
+ public MetadataReaderImpl(FSDataInputStream file,
+ CompressionCodec codec, int bufferSize, int typeCount) {
+ this.file = file;
+ this.codec = codec;
+ this.bufferSize = bufferSize;
+ this.typeCount = typeCount;
+ }
+
+ @Override
+ public RecordReaderImpl.Index readRowIndex(StripeInformation stripe,
+ OrcProto.StripeFooter footer, boolean[] included, OrcProto.RowIndex[] indexes,
+ boolean[] sargColumns, OrcProto.BloomFilterIndex[] bloomFilterIndices) throws IOException {
+ if (footer == null) {
+ footer = readStripeFooter(stripe);
+ }
+ if (indexes == null) {
+ indexes = new OrcProto.RowIndex[typeCount];
+ }
+ if (bloomFilterIndices == null) {
+ bloomFilterIndices = new OrcProto.BloomFilterIndex[typeCount];
+ }
+ long offset = stripe.getOffset();
+ List<OrcProto.Stream> streams = footer.getStreamsList();
+ for (int i = 0; i < streams.size(); i++) {
+ OrcProto.Stream stream = streams.get(i);
+ OrcProto.Stream nextStream = null;
+ if (i < streams.size() - 1) {
+ nextStream = streams.get(i+1);
+ }
+ int col = stream.getColumn();
+ int len = (int) stream.getLength();
+ // row index stream and bloom filter are interlaced, check if the sarg column contains bloom
+ // filter and combine the io to read row index and bloom filters for that column together
+ if (stream.hasKind() && (stream.getKind() == OrcProto.Stream.Kind.ROW_INDEX)) {
+ boolean readBloomFilter = false;
+ if (sargColumns != null && sargColumns[col] &&
+ nextStream.getKind() == OrcProto.Stream.Kind.BLOOM_FILTER) {
+ len += nextStream.getLength();
+ i += 1;
+ readBloomFilter = true;
+ }
+ if ((included == null || included[col]) && indexes[col] == null) {
+ byte[] buffer = new byte[len];
+ file.readFully(offset, buffer, 0, buffer.length);
+ ByteBuffer bb = ByteBuffer.wrap(buffer);
+ indexes[col] = OrcProto.RowIndex.parseFrom(InStream.create(null, "index",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), stream.getLength(),
+ codec, bufferSize));
+ if (readBloomFilter) {
+ bb.position((int) stream.getLength());
+ bloomFilterIndices[col] = OrcProto.BloomFilterIndex.parseFrom(InStream.create(
+ null, "bloom_filter", Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)),
+ nextStream.getLength(), codec, bufferSize));
+ }
+ }
+ }
+ offset += len;
+ }
+
+ RecordReaderImpl.Index index = new RecordReaderImpl.Index(indexes, bloomFilterIndices);
+ return index;
+ }
+
+ @Override
+ public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException {
+ long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength();
+ int tailLength = (int) stripe.getFooterLength();
+
+ // read the footer
+ ByteBuffer tailBuf = ByteBuffer.allocate(tailLength);
+ file.readFully(offset, tailBuf.array(), tailBuf.arrayOffset(), tailLength);
- return OrcProto.StripeFooter.parseFrom(InStream.create(null, "footer",
++ return OrcProto.StripeFooter.parseFrom(InStream.createCodedInputStream(null, "footer",
+ Lists.<DiskRange>newArrayList(new BufferChunk(tailBuf, 0)),
+ tailLength, codec, bufferSize));
+ }
+
+ @Override
+ public void close() throws IOException {
+ file.close();
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 04654bc,57bde3e..8941db1
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@@ -107,8 -106,8 +107,8 @@@ import com.google.common.util.concurren
* that added this event. Insert and update events include the entire row, while
* delete events have null for row.
*/
- public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
+ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
- InputFormatChecker, VectorizedInputFormatInterface,
+ InputFormatChecker, VectorizedInputFormatInterface, LlapWrappableInputFormatInterface,
AcidInputFormat<NullWritable, OrcStruct>, CombineHiveInputFormat.AvoidSplitCombination {
static enum SplitStrategyKind{
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index ce630bb,3bac48a..f3689fe
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@@ -423,47 -383,16 +419,16 @@@ public class ReaderImpl implements Read
int footerSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(footerAbsPos);
bb.limit(footerAbsPos + footerSize);
- InputStream instream = InStream.create(null, "footer", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), footerSize, codec, bufferSize);
- return OrcProto.Footer.parseFrom(instream);
- return OrcProto.Footer.parseFrom(InStream.createCodedInputStream("footer",
++ return OrcProto.Footer.parseFrom(InStream.createCodedInputStream(null, "footer",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), footerSize, codec, bufferSize));
}
private static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos,
int metadataSize, CompressionCodec codec, int bufferSize) throws IOException {
bb.position(metadataAbsPos);
bb.limit(metadataAbsPos + metadataSize);
- InputStream instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
- CodedInputStream in = CodedInputStream.newInstance(instream);
- int msgLimit = DEFAULT_PROTOBUF_MESSAGE_LIMIT;
- OrcProto.Metadata meta = null;
- do {
- try {
- in.setSizeLimit(msgLimit);
- meta = OrcProto.Metadata.parseFrom(in);
- } catch (InvalidProtocolBufferException e) {
- if (e.getMessage().contains("Protocol message was too large")) {
- LOG.warn("Metadata section is larger than " + msgLimit + " bytes. Increasing the max" +
- " size of the coded input stream." );
-
- msgLimit = msgLimit << 1;
- if (msgLimit > PROTOBUF_MESSAGE_MAX_LIMIT) {
- LOG.error("Metadata section exceeds max protobuf message size of " +
- PROTOBUF_MESSAGE_MAX_LIMIT + " bytes.");
- throw e;
- }
-
- // we must have failed in the middle of reading instream and instream doesn't support
- // resetting the stream
- instream = InStream.create(null, "metadata", Lists.<DiskRange>newArrayList(
- new BufferChunk(bb, 0)), metadataSize, codec, bufferSize);
- in = CodedInputStream.newInstance(instream);
- } else {
- throw e;
- }
- }
- } while (meta == null);
- return meta;
- return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream("metadata",
++ return OrcProto.Metadata.parseFrom(InStream.createCodedInputStream(null, "metadata",
+ Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), metadataSize, codec, bufferSize));
}
private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path,
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
----------------------------------------------------------------------
diff --cc ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
index 7e81615,06e3362..e78f7aa
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
@@@ -656,17 -657,18 +657,18 @@@ public class TestOrcFile
Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
- assertEquals(3, OrcUtils.getFlattenedColumnsCount(inspector));
+ TypeDescription schema = writer.getSchema();
+ assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] {false, true, false};
- boolean[] included = OrcUtils.includeColumns("int1", "int1,string1", inspector);
+ boolean[] included = OrcUtils.includeColumns("int1", schema);
assertEquals(true, Arrays.equals(expected, included));
- Metadata metadata = reader.getMetadata();
- int numStripes = metadata.getStripeStatistics().size();
+ List<StripeStatistics> stats = reader.getStripeStatistics();
+ int numStripes = stats.size();
assertEquals(3, numStripes);
- StripeStatistics ss1 = metadata.getStripeStatistics().get(0);
- StripeStatistics ss2 = metadata.getStripeStatistics().get(1);
- StripeStatistics ss3 = metadata.getStripeStatistics().get(2);
+ StripeStatistics ss1 = stats.get(0);
+ StripeStatistics ss2 = stats.get(1);
+ StripeStatistics ss3 = stats.get(2);
assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
@@@ -777,9 -777,11 +777,9 @@@
true, true, true, true};
included = OrcUtils.includeColumns(
"boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map",
- "boolean1,byte1,short1,int1,long1,float1,double1,bytes1,string1,middle,list,map", inspector);
+ schema);
assertEquals(true, Arrays.equals(expected, included));
- Metadata metadata = reader.getMetadata();
-
// check the stats
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(2, stats[1].getNumberOfValues());
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
index 46a25e7,92e7163..c94c3f2
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join1.q.out
@@@ -562,10 -562,9 +562,10 @@@ STAGE PLANS
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 15 Data size: 3651 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), sum(VALUE._col1)
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
----------------------------------------------------------------------
diff --cc ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
index f0a9185,aa201ad..f66d5a4
--- a/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_outer_join4.q.out
@@@ -931,10 -931,9 +931,10 @@@ STAGE PLANS
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 30 Data size: 6838 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 30 Data size: 7006 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Reducer 2
+ Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5ccf669/ql/src/test/results/clientpositive/vectorized_ptf.q.out
----------------------------------------------------------------------