You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by se...@apache.org on 2018/06/22 18:11:26 UTC

[1/7] hive git commit: HIVE-19889: Do not push predicates referencing non deterministic functions (Janaki Lahorani, reviewed by Naveen Gangam)

Repository: hive
Updated Branches:
  refs/heads/master-txnstats ac3f5186e -> 4743c7984


HIVE-19889: Do not push predicates referencing non deterministic functions (Janaki Lahorani, reviewed by Naveen Gangam)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a87f7f0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a87f7f0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a87f7f0

Branch: refs/heads/master-txnstats
Commit: 6a87f7f0f476f35f71a6fdc070a9344bc3fdb924
Parents: 9cd3960
Author: Naveen Gangam <ng...@apache.org>
Authored: Thu Jun 21 16:15:40 2018 -0400
Committer: Naveen Gangam <ng...@apache.org>
Committed: Thu Jun 21 16:15:40 2018 -0400

----------------------------------------------------------------------
 .../rules/HiveFilterProjectTransposeRule.java   |   8 +-
 .../clientpositive/cbo_ppd_non_deterministic.q  |  42 ++++
 .../cbo_ppd_non_deterministic.q.out             | 195 +++++++++++++++++++
 .../results/clientpositive/ppd_udf_col.q.out    |  62 +++---
 .../results/clientpositive/union_offcbo.q.out   |  64 +++---
 5 files changed, 316 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6a87f7f0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
index af2207f..efe20d9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
@@ -69,7 +69,13 @@ public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule {
   @Override
   public boolean matches(RelOptRuleCall call) {
     final Filter filterRel = call.rel(0);
-    RexNode condition = filterRel.getCondition();
+
+    // The condition fetched here can reference a udf that is not deterministic, but defined
+    // as part of the select list when a view is in play.  But the condition after the pushdown
+    // will resolve to using the udf from select list.  The check here for deterministic filters
+    // should be based on the resolved expression.  Refer to test case cbo_ppd_non_deterministic.q.
+    RexNode condition = RelOptUtil.pushPastProject(filterRel.getCondition(), call.rel(1));
+
     if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/6a87f7f0/ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q b/ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q
new file mode 100644
index 0000000..f1a7a63
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_ppd_non_deterministic.q
@@ -0,0 +1,42 @@
+CREATE TABLE `testa`(
+   `col1` string COMMENT '',
+   `col2` string COMMENT '',
+   `col3` string COMMENT '',
+   `col4` string COMMENT '',
+   `col5` string COMMENT '')
+PARTITIONED BY (
+   `part1` string,
+   `part2` string,
+   `part3` string)
+STORED AS AVRO;
+
+insert into testA partition (part1='US', part2='ABC', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='UK', part2='DEF', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='US', part2='DEF', part3='200')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+insert into testA partition (part1='CA', part2='ABC', part3='300')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd');
+
+set hive.cbo.enable=true;
+SET hive.vectorized.execution.enabled=false;
+
+explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20;
+
+SET hive.vectorized.execution.enabled=true;
+
+explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20;

http://git-wip-us.apache.org/repos/asf/hive/blob/6a87f7f0/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out
new file mode 100644
index 0000000..8f00aa8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_ppd_non_deterministic.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: CREATE TABLE `testa`(
+   `col1` string COMMENT '',
+   `col2` string COMMENT '',
+   `col3` string COMMENT '',
+   `col4` string COMMENT '',
+   `col5` string COMMENT '')
+PARTITIONED BY (
+   `part1` string,
+   `part2` string,
+   `part3` string)
+STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testa
+POSTHOOK: query: CREATE TABLE `testa`(
+   `col1` string COMMENT '',
+   `col2` string COMMENT '',
+   `col3` string COMMENT '',
+   `col4` string COMMENT '',
+   `col5` string COMMENT '')
+PARTITIONED BY (
+   `part1` string,
+   `part2` string,
+   `part3` string)
+STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testa
+PREHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testa@part1=US/part2=ABC/part3=123
+POSTHOOK: query: insert into testA partition (part1='US', part2='ABC', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testa@part1=US/part2=ABC/part3=123
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col1 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col2 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col3 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col4 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=ABC,part3=123).col5 SCRIPT []
+PREHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123
+POSTHOOK: query: insert into testA partition (part1='UK', part2='DEF', part3='123')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testa@part1=UK/part2=DEF/part3=123
+POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col1 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col2 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col3 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col4 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=UK,part2=DEF,part3=123).col5 SCRIPT []
+PREHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testa@part1=US/part2=DEF/part3=200
+POSTHOOK: query: insert into testA partition (part1='US', part2='DEF', part3='200')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testa@part1=US/part2=DEF/part3=200
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col1 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col2 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col3 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col4 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=US,part2=DEF,part3=200).col5 SCRIPT []
+PREHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300
+POSTHOOK: query: insert into testA partition (part1='CA', part2='ABC', part3='300')
+values ('12.34', '100', '200', '300', 'abc'),
+('12.341', '1001', '2001', '3001', 'abcd')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testa@part1=CA/part2=ABC/part3=300
+POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col1 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col2 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col3 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col4 SCRIPT []
+POSTHOOK: Lineage: testa PARTITION(part1=CA,part2=ABC,part3=300).col5 SCRIPT []
+PREHOOK: query: explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: testa
+            Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rand() (type: double)
+              outputColumnNames: _col0
+              Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean)
+                Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: 'CA' (type: string), _col0 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                  Limit
+                    Number of rows: 20
+                    Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from (
+select part1,randum123
+from (SELECT *, cast(rand() as double) AS randum123 FROM testA where part1='CA' and part2 = 'ABC') a
+where randum123 <= 0.5) s where s.randum123 > 0.25 limit 20
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: testa
+            Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rand() (type: double)
+              outputColumnNames: _col0
+              Statistics: Num rows: 2 Data size: 4580 Basic stats: COMPLETE Column stats: NONE
+              Filter Operator
+                predicate: ((_col0 <= 0.5D) and (_col0 > 0.25D)) (type: boolean)
+                Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: 'CA' (type: string), _col0 (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                  Limit
+                    Number of rows: 20
+                    Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 2290 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/6a87f7f0/ql/src/test/results/clientpositive/ppd_udf_col.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/ppd_udf_col.q.out b/ql/src/test/results/clientpositive/ppd_udf_col.q.out
index 97ca383..ee5d300 100644
--- a/ql/src/test/results/clientpositive/ppd_udf_col.q.out
+++ b/ql/src/test/results/clientpositive/ppd_udf_col.q.out
@@ -20,19 +20,22 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D)) (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              predicate: (UDFToDouble(key) = 100.0D) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), rand() (type: double)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 <= 0.1D) (type: boolean)
                   Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
 
   Stage: Stage-0
@@ -67,18 +70,18 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D) and (rand() > 0.1D)) (type: boolean)
-              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              predicate: false (type: boolean)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), rand() (type: double)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 20
-                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -199,19 +202,22 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D)) (type: boolean)
-              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+              predicate: (UDFToDouble(key) = 100.0D) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), rand() (type: double)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 <= 0.1D) (type: boolean)
                   Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
       Execution mode: vectorized
 
   Stage: Stage-0
@@ -246,18 +252,18 @@ STAGE PLANS:
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((UDFToDouble(key) = 100.0D) and (rand() <= 0.1D) and (rand() > 0.1D)) (type: boolean)
-              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+              predicate: false (type: boolean)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: key (type: string), rand() (type: double)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 20
-                  Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/6a87f7f0/ql/src/test/results/clientpositive/union_offcbo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_offcbo.q.out b/ql/src/test/results/clientpositive/union_offcbo.q.out
index a723f00..8480043 100644
--- a/ql/src/test/results/clientpositive/union_offcbo.q.out
+++ b/ql/src/test/results/clientpositive/union_offcbo.q.out
@@ -591,18 +591,21 @@ STAGE PLANS:
             alias: ttest1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean)
+              predicate: (ts1 = '2015-11-20') (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col1 (type: string)
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                    value expressions: _col1 (type: string)
           TableScan
             alias: ttest2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -681,23 +684,26 @@ STAGE PLANS:
             alias: ttest1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean)
+              predicate: (ts1 = '2015-11-20') (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: id1 (type: bigint), sts (type: string), at1 (type: bigint), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col3 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col3 (type: string)
+                Filter Operator
+                  predicate: _col3 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string)
+                  Reduce Output Operator
+                    key expressions: _col3 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col3 (type: string)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                    value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string)
           TableScan
             alias: ttest2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: '2015-11-20' BETWEEN dt1 AND dt2 (type: boolean)
+              predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: ts1 (type: string), khash (type: string), rhash (type: string)
@@ -1630,18 +1636,21 @@ STAGE PLANS:
             alias: ttest1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean)
+              predicate: (ts1 = '2015-11-20') (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string)
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                Filter Operator
+                  predicate: _col0 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col1 (type: string)
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                    value expressions: _col1 (type: string)
           TableScan
             alias: ttest2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
@@ -1720,23 +1729,26 @@ STAGE PLANS:
             alias: ttest1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: ((ts1 = '2015-11-20') and reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) is not null) (type: boolean)
+              predicate: (ts1 = '2015-11-20') (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: id1 (type: bigint), sts (type: string), at1 (type: bigint), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) (type: string), reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col3 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col3 (type: string)
+                Filter Operator
+                  predicate: _col3 is not null (type: boolean)
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string)
+                  Reduce Output Operator
+                    key expressions: _col3 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col3 (type: string)
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                    value expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: bigint), _col4 (type: string)
           TableScan
             alias: ttest2
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Filter Operator
-              predicate: '2015-11-20' BETWEEN dt1 AND dt2 (type: boolean)
+              predicate: ('2015-11-20' BETWEEN dt1 AND dt2 and khash is not null) (type: boolean)
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Select Operator
                 expressions: ts1 (type: string), khash (type: string), rhash (type: string)

[6/7] hive git commit: HIVE-19963 : metadata_only_queries.q fails (Steve Yeom, reviewed by Sergey Shelukhin)

Posted by se...@apache.org.

HIVE-19963 : metadata_only_queries.q fails (Steve Yeom, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/42a9f3bb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/42a9f3bb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/42a9f3bb

Branch: refs/heads/master-txnstats
Commit: 42a9f3bb031624da1f3553dfd9a61702bf280846
Parents: ac3f518
Author: sergey <se...@apache.org>
Authored: Fri Jun 22 11:04:30 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Fri Jun 22 11:04:30 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/42a9f3bb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 4d69f4c..18a27c4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -541,8 +541,8 @@ public class StatsOptimizer extends Transform {
                   hive.getMSC().getTableColumnStatistics(
                     tbl.getDbName(), tbl.getTableName(),
                     Lists.newArrayList(colName),
-                      tableSnapshot.getTxnId(),
-                      tableSnapshot.getValidWriteIdList());
+                      tableSnapshot != null ? tableSnapshot.getTxnId() : -1,
+                      tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null);
               if (stats.isEmpty()) {
                 Logger.debug("No stats for " + tbl.getTableName() + " column " + colName);
                 return null;
@@ -685,8 +685,8 @@ public class StatsOptimizer extends Transform {
               ColumnStatisticsData statData =
                   hive.getMSC().getTableColumnStatistics(
                     tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName),
-                      tableSnapshot.getTxnId(),
-                      tableSnapshot.getValidWriteIdList())
+                      tableSnapshot != null ? tableSnapshot.getTxnId() : -1,
+                      tableSnapshot != null ? tableSnapshot.getValidWriteIdList() : null)
                     .get(0).getStatsData();
               String name = colDesc.getTypeString().toUpperCase();
               switch (type) {

[3/7] hive git commit: HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR)

Posted by se...@apache.org.

HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24e16cc5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24e16cc5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24e16cc5

Branch: refs/heads/master-txnstats
Commit: 24e16cc57293ea6771cd55009f8cfd29870a39ee
Parents: 6adab1c
Author: Aihua Xu <ai...@apache.org>
Authored: Thu Jun 14 13:35:49 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Thu Jun 21 14:36:07 2018 -0700

----------------------------------------------------------------------
 .../hcatalog/pig/AbstractHCatStorerTest.java    |  2 +-
 .../pig/TestHCatLoaderComplexSchema.java        |  3 ++
 .../hive/hcatalog/pig/TestHCatStorer.java       |  4 +-
 .../apache/hadoop/hive/ql/io/IOConstants.java   |  1 +
 .../ql/io/JsonFileStorageFormatDescriptor.java  | 51 ++++++++++++++++++++
 ...he.hadoop.hive.ql.io.StorageFormatDescriptor |  1 +
 .../hive/ql/io/TestStorageFormatDescriptor.java |  3 ++
 .../test/queries/clientpositive/json_serde1.q   |  9 ++--
 .../results/clientpositive/json_serde1.q.out    | 44 ++++++++++++++++-
 9 files changed, 109 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
index 97277b5..a5cf3a5 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
@@ -54,7 +54,7 @@ import org.slf4j.LoggerFactory;
 public abstract class AbstractHCatStorerTest extends HCatBaseTest {
   static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class);
   static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data";
-  String storageFormat;
+  protected String storageFormat;
 
   public AbstractHCatStorerTest() {
     storageFormat = getStorageFormat();

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
index 8f06d39..37e670c 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java
@@ -75,6 +75,9 @@ public class TestHCatLoaderComplexSchema {
         put(IOConstants.PARQUETFILE, new HashSet<String>() {{
           add("testMapNullKey");
         }});
+        put(IOConstants.JSONFILE, new HashSet<String>() {{
+          add("testMapNullKey");
+        }});
       }};
 
   private String storageFormat;

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
----------------------------------------------------------------------
diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
index 477ea66..cb02139 100644
--- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
+++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
@@ -86,8 +86,6 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
         }
       };
 
-  private String storageFormat;
-
   @Parameterized.Parameters
   public static Collection<Object[]> generateParameters() {
     return StorageFormats.names();
@@ -99,7 +97,7 @@ public class TestHCatStorer extends AbstractHCatStorerTest {
 
   @Override
   String getStorageFormat() {
-    return null;
+    return this.storageFormat;
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
index f60d296..2be864e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java
@@ -35,6 +35,7 @@ public final class IOConstants {
   public static final String PARQUETFILE = "PARQUETFILE";
   public static final String AVRO = "AVRO";
   public static final String AVROFILE = "AVROFILE";
+  public static final String JSONFILE = "JSONFILE";
 
   /**
    * The desired TABLE column names and types for input format schema evolution.

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
new file mode 100644
index 0000000..00c6178
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.util.Set;
+
+import org.apache.hadoop.hive.serde2.JsonSerDe;
+
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * A storage format descriptor class to support "STORED AS JSONFILE" syntax.
+ *
+ */
+public class JsonFileStorageFormatDescriptor extends AbstractStorageFormatDescriptor {
+  @Override
+  public Set<String> getNames() {
+    return ImmutableSet.of(IOConstants.JSONFILE);
+  }
+
+  @Override
+  public String getInputFormat() {
+    return IOConstants.TEXTFILE_INPUT;
+  }
+
+  @Override
+  public String getOutputFormat() {
+    return IOConstants.TEXTFILE_OUTPUT;
+  }
+
+  @Override
+  public String getSerde() {
+    return JsonSerDe.class.getName();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
----------------------------------------------------------------------
diff --git a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
index d858a95..c28a302 100644
--- a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
+++ b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor
@@ -4,3 +4,4 @@ org.apache.hadoop.hive.ql.io.RCFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.ORCFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.ParquetFileStorageFormatDescriptor
 org.apache.hadoop.hive.ql.io.AvroStorageFormatDescriptor
+org.apache.hadoop.hive.ql.io.JsonFileStorageFormatDescriptor
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
index 72acaad..86d3703 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java
@@ -40,5 +40,8 @@ public class TestStorageFormatDescriptor {
         (new ParquetFileStorageFormatDescriptor()).getNames());
     Assert.assertEquals(Sets.newHashSet(IOConstants.AVRO, IOConstants.AVROFILE),
       (new AvroStorageFormatDescriptor()).getNames());
+    Assert.assertEquals(Sets.newHashSet(IOConstants.JSONFILE),
+        (new JsonFileStorageFormatDescriptor()).getNames());
+
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/queries/clientpositive/json_serde1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/json_serde1.q b/ql/src/test/queries/clientpositive/json_serde1.q
index b805925..fcbf1c0 100644
--- a/ql/src/test/queries/clientpositive/json_serde1.q
+++ b/ql/src/test/queries/clientpositive/json_serde1.q
@@ -1,9 +1,8 @@
 --! qt:dataset:src
 
-add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar;
-
 drop table if exists json_serde1_1;
 drop table if exists json_serde1_2;
+drop table if exists json_serde1_3;
 
 create table json_serde1_1 (a array<string>,b map<string,int>)
   row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
@@ -17,7 +16,7 @@ create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
 
 insert into table json_serde1_2
   select
@@ -33,5 +32,9 @@ insert into table json_serde1_2
 
 select * from json_serde1_2;
 
+create table json_serde1_3 (c1 int, c2 string) stored as jsonfile;
+show create table json_serde1_3;
+
 drop table json_serde1_1;
 drop table json_serde1_2;
+drop table json_serde1_3;

http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/results/clientpositive/json_serde1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/json_serde1.q.out b/ql/src/test/results/clientpositive/json_serde1.q.out
index e14d674..341a494 100644
--- a/ql/src/test/results/clientpositive/json_serde1.q.out
+++ b/ql/src/test/results/clientpositive/json_serde1.q.out
@@ -6,6 +6,10 @@ PREHOOK: query: drop table if exists json_serde1_2
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table if exists json_serde1_2
 POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists json_serde1_3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists json_serde1_3
+POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>)
   row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
 PREHOOK: type: CREATETABLE
@@ -42,7 +46,7 @@ PREHOOK: query: create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@json_serde1_2
@@ -50,7 +54,7 @@ POSTHOOK: query: create table json_serde1_2 (
   a array<int>,
   b map<int,date>,
   c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>>
-) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@json_serde1_2
@@ -95,6 +99,34 @@ POSTHOOK: Input: default@json_serde1_2
 #### A masked pattern was here ####
 [3,2,1]	{1:"2001-01-01",2:null}	{"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
 [3,2,1]	{1:"2001-01-01",2:null}	{"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}}
+PREHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde1_3
+PREHOOK: query: show create table json_serde1_3
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@json_serde1_3
+POSTHOOK: query: show create table json_serde1_3
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@json_serde1_3
+CREATE TABLE `json_serde1_3`(
+  `c1` int COMMENT 'from deserializer', 
+  `c2` string COMMENT 'from deserializer')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.JsonSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+#### A masked pattern was here ####
 PREHOOK: query: drop table json_serde1_1
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@json_serde1_1
@@ -111,3 +143,11 @@ POSTHOOK: query: drop table json_serde1_2
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@json_serde1_2
 POSTHOOK: Output: default@json_serde1_2
+PREHOOK: query: drop table json_serde1_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde1_3
+PREHOOK: Output: default@json_serde1_3
+POSTHOOK: query: drop table json_serde1_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde1_3
+POSTHOOK: Output: default@json_serde1_3

[5/7] hive git commit: HIVE-19016: Vectorization and Parquet: Disable vectorization for nested complex types (Matt McCline, reviewed by Vihang Karajgaonkar and Teddy Choi)

Posted by se...@apache.org.

HIVE-19016: Vectorization and Parquet: Disable vectorization for nested complex types (Matt McCline, reviewed by Vihang Karajgaonkar and Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6d532e7c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6d532e7c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6d532e7c

Branch: refs/heads/master-txnstats
Commit: 6d532e7c4396a81b0afd16b66b4873c5fe9398ee
Parents: e36f6e4
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Jun 22 10:46:24 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Jun 22 10:46:24 2018 -0500

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |  64 ++-
 .../vector_parquet_nested_two_level_complex.q   |  67 +++
 ...ector_parquet_nested_two_level_complex.q.out | 540 +++++++++++++++++++
 4 files changed, 670 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6d532e7c/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index a3ddbda..93e2a44 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -759,6 +759,7 @@ minillaplocal.query.files=\
   vector_orc_null_check.q,\
   vector_order_null.q,\
   vector_outer_reference_windowed.q,\
+  vector_parquet_nested_two_level_complex.q,\
   vector_partitioned_date_time.q,\
   vector_ptf_1.q,\
   vector_ptf_part_simple.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/6d532e7c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index f4e8207..7afbf04 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -221,6 +221,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.Pr
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -1288,6 +1290,7 @@ public class Vectorizer implements PhysicalPlanResolver {
      */
     private boolean verifyAndSetVectorPartDesc(
         PartitionDesc pd, boolean isFullAcidTable,
+        List<TypeInfo> allTypeInfoList,
         Set<String> inputFileFormatClassNameSet,
         Map<VectorPartitionDesc, VectorPartitionDesc> vectorPartitionDescMap,
         Set<String> enabledConditionsMetSet, ArrayList<String> enabledConditionsNotMetList,
@@ -1332,8 +1335,13 @@ public class Vectorizer implements PhysicalPlanResolver {
 
       if (useVectorizedInputFileFormat) {
 
-        if (isInputFileFormatVectorized && !isInputFormatExcluded(inputFileFormatClassName,
-            vectorizedInputFormatExcludes)) {
+        if (isInputFileFormatVectorized &&
+            !isInputFormatExcluded(
+                inputFileFormatClassName,
+                vectorizedInputFormatExcludes) &&
+            !hasUnsupportedVectorizedParquetDataType(
+                inputFileFormatClass,
+                allTypeInfoList)) {
 
           addVectorizedInputFileFormatSupport(
               newSupportSet, isInputFileFormatVectorized, inputFileFormatClass);
@@ -1517,6 +1525,57 @@ public class Vectorizer implements PhysicalPlanResolver {
       return false;
     }
 
+    private boolean hasUnsupportedVectorizedParquetDataType(
+        Class<? extends InputFormat> inputFileFormatClass, List<TypeInfo> allTypeInfoList) {
+      if (!inputFileFormatClass.equals(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat.class)) {
+        return false;
+      }
+
+      /*
+       * Currently, VectorizedParquetRecordReader cannot handle nested complex types.
+       */
+      for (TypeInfo typeInfo : allTypeInfoList) {
+        if (!(typeInfo instanceof PrimitiveTypeInfo)) {
+          switch (typeInfo.getCategory()) {
+          case LIST:
+            if (!(((ListTypeInfo) typeInfo).getListElementTypeInfo() instanceof PrimitiveTypeInfo)) {
+              return true;
+            }
+            break;
+          case MAP:
+            {
+              MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+              if (!(mapTypeInfo.getMapKeyTypeInfo() instanceof PrimitiveTypeInfo)) {
+                return true;
+              }
+              if (!(mapTypeInfo.getMapValueTypeInfo() instanceof PrimitiveTypeInfo)) {
+                return true;
+              }
+            }
+            break;
+          case STRUCT:
+            {
+              StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+              List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+              for (TypeInfo fieldTypeInfo : fieldTypeInfos) {
+                if (!(fieldTypeInfo instanceof PrimitiveTypeInfo)) {
+                  return true;
+                }
+              }
+            }
+            break;
+          case UNION:
+            // Not supported at all.
+            return false;
+          default:
+            throw new RuntimeException(
+                "Unsupported complex type category " + typeInfo.getCategory());
+          }
+        }
+      }
+      return false;
+    }
+
     private void setValidateInputFormatAndSchemaEvolutionExplain(MapWork mapWork,
         Set<String> inputFileFormatClassNameSet,
         Map<VectorPartitionDesc, VectorPartitionDesc> vectorPartitionDescMap,
@@ -1594,6 +1653,7 @@ public class Vectorizer implements PhysicalPlanResolver {
         final boolean isVerifiedVectorPartDesc =
             verifyAndSetVectorPartDesc(
               partDesc, isFullAcidTable,
+              allTypeInfoList,
               inputFileFormatClassNameSet,
               vectorPartitionDescMap,
               enabledConditionsMetSet, enabledConditionsNotMetList,

http://git-wip-us.apache.org/repos/asf/hive/blob/6d532e7c/ql/src/test/queries/clientpositive/vector_parquet_nested_two_level_complex.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_parquet_nested_two_level_complex.q b/ql/src/test/queries/clientpositive/vector_parquet_nested_two_level_complex.q
new file mode 100644
index 0000000..70480e6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_parquet_nested_two_level_complex.q
@@ -0,0 +1,67 @@
+set hive.vectorized.execution.enabled=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+
+create table nested_array_array_table (
+nested_array_array  array<array<int>>)
+STORED AS PARQUET;
+
+create table nested_array_map_table (
+nested_array_map  array<map<string,string>>)
+STORED AS PARQUET;
+
+create table nested_array_struct_table (
+nested_array_map  array<struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET;
+
+create table nested_map_array_table (
+nested_map_array  map<string,array<int>>)
+STORED AS PARQUET;
+
+create table nested_map_map_table (
+nested_map_map    map<string,map<string,string>>)
+STORED AS PARQUET;
+
+create table nested_map_struct_table (
+nested_map_struct    map<string,struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET;
+
+create table nested_struct_array_table (
+nested_struct_array struct<s:string, i:bigint, a:array<int>>)
+STORED AS PARQUET;
+
+create table nested_struct_map_table (
+nested_struct_map struct<s:string, i:bigint, m:map<string,string>>)
+STORED AS PARQUET;
+
+create table nested_struct_struct_table (
+nested_struct_struct struct<s:string, i:bigint, s2:struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET;
+
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_array_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_array_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_map_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_struct_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_array_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_map_table;
+
+EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_struct_table;

http://git-wip-us.apache.org/repos/asf/hive/blob/6d532e7c/ql/src/test/results/clientpositive/llap/vector_parquet_nested_two_level_complex.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_parquet_nested_two_level_complex.q.out b/ql/src/test/results/clientpositive/llap/vector_parquet_nested_two_level_complex.q.out
new file mode 100644
index 0000000..daeca1c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_parquet_nested_two_level_complex.q.out
@@ -0,0 +1,540 @@
+PREHOOK: query: create table nested_array_array_table (
+nested_array_array  array<array<int>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_array_array_table
+POSTHOOK: query: create table nested_array_array_table (
+nested_array_array  array<array<int>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_array_array_table
+PREHOOK: query: create table nested_array_map_table (
+nested_array_map  array<map<string,string>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_array_map_table
+POSTHOOK: query: create table nested_array_map_table (
+nested_array_map  array<map<string,string>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_array_map_table
+PREHOOK: query: create table nested_array_struct_table (
+nested_array_map  array<struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_array_struct_table
+POSTHOOK: query: create table nested_array_struct_table (
+nested_array_map  array<struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_array_struct_table
+PREHOOK: query: create table nested_map_array_table (
+nested_map_array  map<string,array<int>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_map_array_table
+POSTHOOK: query: create table nested_map_array_table (
+nested_map_array  map<string,array<int>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_map_array_table
+PREHOOK: query: create table nested_map_map_table (
+nested_map_map    map<string,map<string,string>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_map_map_table
+POSTHOOK: query: create table nested_map_map_table (
+nested_map_map    map<string,map<string,string>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_map_map_table
+PREHOOK: query: create table nested_map_struct_table (
+nested_map_struct    map<string,struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_map_struct_table
+POSTHOOK: query: create table nested_map_struct_table (
+nested_map_struct    map<string,struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_map_struct_table
+PREHOOK: query: create table nested_struct_array_table (
+nested_struct_array struct<s:string, i:bigint, a:array<int>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_struct_array_table
+POSTHOOK: query: create table nested_struct_array_table (
+nested_struct_array struct<s:string, i:bigint, a:array<int>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_struct_array_table
+PREHOOK: query: create table nested_struct_map_table (
+nested_struct_map struct<s:string, i:bigint, m:map<string,string>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_struct_map_table
+POSTHOOK: query: create table nested_struct_map_table (
+nested_struct_map struct<s:string, i:bigint, m:map<string,string>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_struct_map_table
+PREHOOK: query: create table nested_struct_struct_table (
+nested_struct_struct struct<s:string, i:bigint, s2:struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nested_struct_struct_table
+POSTHOOK: query: create table nested_struct_struct_table (
+nested_struct_struct struct<s:string, i:bigint, s2:struct<latitude: DOUBLE, longitude: DOUBLE>>)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nested_struct_struct_table
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_array_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_array_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_array_array_table
+                  Statistics: Num rows: 1 Data size: 1280 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_array_array (type: array<array<int>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 1280 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 1280 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_array_map_table
+                  Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_array_map (type: array<map<string,string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_array_map_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_array_map_table
+                  Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_array_map (type: array<map<string,string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 9280 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_array_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_array_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_map_array_table
+                  Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_map_array (type: map<string,array<int>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_map_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_map_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_map_map_table
+                  Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_map_map (type: map<string,map<string,string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 1656 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_struct_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_map_struct_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_map_struct_table
+                  Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_map_struct (type: map<string,struct<latitude:double,longitude:double>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_array_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_array_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_struct_array_table
+                  Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_struct_array (type: struct<s:string,i:bigint,a:array<int>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_map_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_map_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_struct_map_table
+                  Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_struct_map (type: struct<s:string,i:bigint,m:map<string,string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 1168 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_struct_table
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+SELECT * FROM nested_struct_struct_table
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: nested_struct_struct_table
+                  Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: nested_struct_struct (type: struct<s:string,i:bigint,s2:struct<latitude:double,longitude:double>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: all inputs (cache only)
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
+                inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

[4/7] hive git commit: HIVE-19783: Retrieve only locations in HiveMetaStore.dropPartitionsAndGetLocations (Peter Vary, reviewed by Alexander Kolbasov and Vihang Karajgaonkar)

Posted by se...@apache.org.

HIVE-19783: Retrieve only locations in HiveMetaStore.dropPartitionsAndGetLocations (Peter Vary, reviewed by Alexander Kolbasov and Vihang Karajgaonkar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e36f6e4f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e36f6e4f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e36f6e4f

Branch: refs/heads/master-txnstats
Commit: e36f6e4fbda354f33ba9cef6cf25e5573c78d618
Parents: 24e16cc
Author: Peter Vary <pv...@cloudera.com>
Authored: Fri Jun 22 10:10:33 2018 +0200
Committer: Peter Vary <pv...@cloudera.com>
Committed: Fri Jun 22 10:10:33 2018 +0200

----------------------------------------------------------------------
 .../listener/DummyRawStoreFailEvent.java        |   6 +
 .../hadoop/hive/metastore/HiveMetaStore.java    | 111 ++++++++-----------
 .../hadoop/hive/metastore/ObjectStore.java      |  46 ++++++++
 .../apache/hadoop/hive/metastore/RawStore.java  |  15 +++
 .../hive/metastore/cache/CachedStore.java       |   6 +
 .../hadoop/hive/metastore/utils/FileUtils.java  |  14 +++
 .../DummyRawStoreControlledCommit.java          |   6 +
 .../DummyRawStoreForJdoConnection.java          |   6 +
 .../client/MetaStoreFactoryForTests.java        |   1 +
 .../TestTablesCreateDropAlterTruncate.java      |  14 ++-
 10 files changed, 160 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
----------------------------------------------------------------------
diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
index 8f9a03f..3c334fa 100644
--- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
+++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
@@ -295,6 +295,12 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable {
   }
 
   @Override
+  public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max) {
+    return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max);
+  }
+
+  @Override
   public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm)
       throws MetaException {
     objectStore.updateCreationMetadata(catName, dbname, tablename, cm);

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index e88f9a5..e9d7e7c 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -1505,7 +1505,8 @@ public class HiveMetaStore extends ThriftHiveMetastore {
                       " which is not writable by " + SecurityUtils.getUser());
                 }
 
-                if (!isSubdirectory(databasePath, materializedViewPath)) {
+                if (!FileUtils.isSubdirectory(databasePath.toString(),
+                    materializedViewPath.toString())) {
                   tablePaths.add(materializedViewPath);
                 }
               }
@@ -1545,7 +1546,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
                       " which is not writable by " + SecurityUtils.getUser());
                 }
 
-                if (!isSubdirectory(databasePath, tablePath)) {
+                if (!FileUtils.isSubdirectory(databasePath.toString(), tablePath.toString())) {
                   tablePaths.add(tablePath);
                 }
               }
@@ -1553,7 +1554,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
               // For each partition in each table, drop the partitions and get a list of
               // partitions' locations which might need to be deleted
               partitionPaths = dropPartitionsAndGetLocations(ms, catName, name, table.getTableName(),
-                  tablePath, table.getPartitionKeys(), deleteData && !isExternal(table));
+                  tablePath, deleteData && !isExternal(table));
 
               // Drop the table but not its data
               drop_table(MetaStoreUtils.prependCatalogToDbName(table.getCatName(), table.getDbName(), conf),
@@ -1604,20 +1605,6 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       }
     }
 
-    /**
-     * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not
-     * take into account any intricacies of the underlying file system, which is assumed to be
-     * HDFS. This should not return any false positives, but may return false negatives.
-     *
-     * @param parent
-     * @param other
-     * @return
-     */
-    private boolean isSubdirectory(Path parent, Path other) {
-      return other.toString().startsWith(parent.toString().endsWith(Path.SEPARATOR) ?
-          parent.toString() : parent.toString() + Path.SEPARATOR);
-    }
-
     @Override
     public void drop_database(final String dbName, final boolean deleteData, final boolean cascade)
         throws NoSuchObjectException, InvalidOperationException, MetaException {
@@ -2482,7 +2469,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
 
         // Drop the partitions and get a list of locations which need to be deleted
         partPaths = dropPartitionsAndGetLocations(ms, catName, dbname, name, tblPath,
-            tbl.getPartitionKeys(), deleteData && !isExternal);
+            deleteData && !isExternal);
 
         // Drop any constraints on the table
         ms.dropConstraint(catName, dbname, name, null, true);
@@ -2567,79 +2554,75 @@ public class HiveMetaStore extends ThriftHiveMetastore {
     }
 
     /**
-     * Retrieves the partitions specified by partitionKeys. If checkLocation, for locations of
-     * partitions which may not be subdirectories of tablePath checks to make the locations are
-     * writable.
+     * Deletes the partitions specified by catName, dbName, tableName. If checkLocation is true, for
+     * locations of partitions which may not be subdirectories of tablePath checks to make sure the
+     * locations are writable.
      *
      * Drops the metadata for each partition.
      *
      * Provides a list of locations of partitions which may not be subdirectories of tablePath.
      *
-     * @param ms
-     * @param dbName
-     * @param tableName
-     * @param tablePath
-     * @param partitionKeys
-     * @param checkLocation
-     * @return
+     * @param ms RawStore to use for metadata retrieval and delete
+     * @param catName The catName
+     * @param dbName The dbName
+     * @param tableName The tableName
+     * @param tablePath The tablePath of which subdirectories does not have to be checked
+     * @param checkLocation Should we check the locations at all
+     * @return The list of the Path objects to delete (only in case checkLocation is true)
      * @throws MetaException
      * @throws IOException
-     * @throws InvalidInputException
-     * @throws InvalidObjectException
      * @throws NoSuchObjectException
      */
     private List<Path> dropPartitionsAndGetLocations(RawStore ms, String catName, String dbName,
-      String tableName, Path tablePath, List<FieldSchema> partitionKeys, boolean checkLocation)
-      throws MetaException, IOException, NoSuchObjectException, InvalidObjectException,
-      InvalidInputException {
-      int partitionBatchSize = MetastoreConf.getIntVar(conf,
-          ConfVars.BATCH_RETRIEVE_MAX);
-      Path tableDnsPath = null;
+        String tableName, Path tablePath, boolean checkLocation)
+        throws MetaException, IOException, NoSuchObjectException {
+      int batchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_OBJECTS_MAX);
+      String tableDnsPath = null;
       if (tablePath != null) {
-        tableDnsPath = wh.getDnsPath(tablePath);
+        tableDnsPath = wh.getDnsPath(tablePath).toString();
       }
-      List<Path> partPaths = new ArrayList<>();
-      Table tbl = ms.getTable(catName, dbName, tableName);
 
-      // call dropPartition on each of the table's partitions to follow the
-      // procedure for cleanly dropping partitions.
+      List<Path> partPaths = new ArrayList<>();
       while (true) {
-        List<Partition> partsToDelete = ms.getPartitions(catName, dbName, tableName, partitionBatchSize);
-        if (partsToDelete == null || partsToDelete.isEmpty()) {
-          break;
-        }
-        List<String> partNames = new ArrayList<>();
-        for (Partition part : partsToDelete) {
-          if (checkLocation && part.getSd() != null &&
-              part.getSd().getLocation() != null) {
-
-            Path partPath = wh.getDnsPath(new Path(part.getSd().getLocation()));
-            if (tableDnsPath == null ||
-                (partPath != null && !isSubdirectory(tableDnsPath, partPath))) {
-              if (!wh.isWritable(partPath.getParent())) {
-                throw new MetaException("Table metadata not deleted since the partition " +
-                    Warehouse.makePartName(partitionKeys, part.getValues()) +
-                    " has parent location " + partPath.getParent() + " which is not writable " +
-                    "by " + SecurityUtils.getUser());
+        Map<String, String> partitionLocations = ms.getPartitionLocations(catName, dbName, tableName,
+            tableDnsPath, batchSize);
+        if (partitionLocations == null || partitionLocations.isEmpty()) {
+          // No more partitions left to drop. Return with the collected path list to delete.
+          return partPaths;
+        }
+
+        if (checkLocation) {
+          for (String partName : partitionLocations.keySet()) {
+            String pathString = partitionLocations.get(partName);
+            if (pathString != null) {
+              Path partPath = wh.getDnsPath(new Path(pathString));
+              // Double check here. Maybe Warehouse.getDnsPath revealed relationship between the
+              // path objects
+              if (tableDnsPath == null ||
+                      !FileUtils.isSubdirectory(tableDnsPath, partPath.toString())) {
+                if (!wh.isWritable(partPath.getParent())) {
+                  throw new MetaException("Table metadata not deleted since the partition "
+                      + partName + " has parent location " + partPath.getParent()
+                      + " which is not writable by " + SecurityUtils.getUser());
+                }
+                partPaths.add(partPath);
               }
-              partPaths.add(partPath);
             }
           }
-          partNames.add(Warehouse.makePartName(tbl.getPartitionKeys(), part.getValues()));
         }
+
         for (MetaStoreEventListener listener : listeners) {
           //No drop part listener events fired for public listeners historically, for drop table case.
           //Limiting to internal listeners for now, to avoid unexpected calls for public listeners.
           if (listener instanceof HMSMetricsListener) {
-            for (@SuppressWarnings("unused") Partition part : partsToDelete) {
+            for (@SuppressWarnings("unused") String partName : partitionLocations.keySet()) {
               listener.onDropPartition(null);
             }
           }
         }
-        ms.dropPartitions(catName, dbName, tableName, partNames);
-      }
 
-      return partPaths;
+        ms.dropPartitions(catName, dbName, tableName, new ArrayList<>(partitionLocations.keySet()));
+      }
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index e99f888..0d2da7a 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -2817,6 +2817,52 @@ public class ObjectStore implements RawStore, Configurable {
     return getPartitionsInternal(catName, dbName, tableName, maxParts, true, true);
   }
 
+  @Override
+  public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max) {
+    catName = normalizeIdentifier(catName);
+    dbName = normalizeIdentifier(dbName);
+    tblName = normalizeIdentifier(tblName);
+
+    boolean success = false;
+    Query query = null;
+    Map<String, String> partLocations = new HashMap<>();
+    try {
+      openTransaction();
+      LOG.debug("Executing getPartitionLocations");
+
+      query = pm.newQuery(MPartition.class);
+      query.setFilter("this.table.database.catalogName == t1 && this.table.database.name == t2 "
+          + "&& this.table.tableName == t3");
+      query.declareParameters("String t1, String t2, String t3");
+      query.setResult("this.partitionName, this.sd.location");
+      if (max >= 0) {
+        //Row limit specified, set it on the Query
+        query.setRange(0, max);
+      }
+
+      List<Object[]> result = (List<Object[]>)query.execute(catName, dbName, tblName);
+      for(Object[] row:result) {
+        String location = (String)row[1];
+        if (baseLocationToNotShow != null && location != null
+            && FileUtils.isSubdirectory(baseLocationToNotShow, location)) {
+          location = null;
+        }
+        partLocations.put((String)row[0], location);
+      }
+      LOG.debug("Done executing query for getPartitionLocations");
+      success = commitTransaction();
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+      if (query != null) {
+        query.closeAll();
+      }
+    }
+    return partLocations;
+  }
+
   protected List<Partition> getPartitionsInternal(String catName, String dbName, String tblName,
                                                   final int maxParts, boolean allowSql, boolean allowJdo)
           throws MetaException, NoSuchObjectException {

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
index bbbdf21..c8905c8 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
@@ -363,6 +363,21 @@ public interface RawStore extends Configurable {
       String tableName, int max) throws MetaException, NoSuchObjectException;
 
   /**
+   * Get the location for every partition of a given table. If a partition location is a child of
+   * baseLocationToNotShow then the partitionName is returned, but the only null location is
+   * returned.
+   * @param catName catalog name.
+   * @param dbName database name.
+   * @param tblName table name.
+   * @param baseLocationToNotShow Partition locations which are child of this path are omitted, and
+   *     null value returned instead.
+   * @param max The maximum number of partition locations returned, or -1 for all
+   * @return The map of the partitionName, location pairs
+   */
+  Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max);
+
+  /**
    * Alter a table.
    * @param catName catalog the table is in.
    * @param dbname database the table is in.

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index 7c3588d..1da9798 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -1039,6 +1039,12 @@ public class CachedStore implements RawStore, Configurable {
   }
 
   @Override
+  public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max) {
+    return rawStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max);
+  }
+
+  @Override
   public void alterTable(String catName, String dbName, String tblName, Table newTable)
       throws InvalidObjectException, MetaException {
     rawStore.alterTable(catName, dbName, tblName, newTable);

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
index ec9e9e2..963e12f 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java
@@ -510,4 +510,18 @@ public class FileUtils {
 
     return new Path(scheme, authority, pathUri.getPath());
   }
+
+
+  /**
+   * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not
+   * take into account any intricacies of the underlying file system, which is assumed to be
+   * HDFS. This should not return any false positives, but may return false negatives.
+   *
+   * @param parent
+   * @param other Directory to check if it is a subdirectory of parent
+   * @return True, if other is subdirectory of parent
+   */
+  public static boolean isSubdirectory(String parent, String other) {
+    return other.startsWith(parent.endsWith(Path.SEPARATOR) ? parent : parent + Path.SEPARATOR);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
index 7c7429d..8e195d0 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
@@ -272,6 +272,12 @@ public class DummyRawStoreControlledCommit implements RawStore, Configurable {
   }
 
   @Override
+  public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max) {
+    return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max);
+  }
+
+  @Override
   public void alterTable(String catName, String dbName, String name, Table newTable)
       throws InvalidObjectException, MetaException {
     objectStore.alterTable(catName, dbName, name, newTable);

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
index e4f2a17..85eb6d5 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
@@ -270,6 +270,12 @@ public class DummyRawStoreForJdoConnection implements RawStore {
   }
 
   @Override
+  public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName,
+      String baseLocationToNotShow, int max) {
+    return Collections.emptyMap();
+  }
+
+  @Override
   public void alterTable(String catName, String dbname, String name, Table newTable)
       throws InvalidObjectException, MetaException {
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
index 1a57df2..55ef153 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java
@@ -57,6 +57,7 @@ public final class MetaStoreFactoryForTests {
     // set some values to use for getting conf. vars
     MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED, true);
     MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX, 2);
+    MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_OBJECTS_MAX, 2);
     MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.LIMIT_PARTITION_REQUEST,
         DEFAULT_LIMIT_PARTITION_REQUEST);
     MetaStoreTestUtils.setConfForStandloneMode(conf);

http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java
index e1c3dcb..c40b42a 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java
@@ -167,12 +167,19 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest {
             .create(client, metaStore.getConf());
 
     // Create partitions for the partitioned table
-    for(int i=0; i < 3; i++) {
+    for(int i=0; i < 2; i++) {
       new PartitionBuilder()
               .inTable(testTables[3])
               .addValue("a" + i)
               .addToTable(client, metaStore.getConf());
     }
+    // Add an external partition too
+    new PartitionBuilder()
+        .inTable(testTables[3])
+        .addValue("a2")
+        .setLocation(metaStore.getWarehouseRoot() + "/external/a2")
+        .addToTable(client, metaStore.getConf());
+
     // Add data files to the partitioned table
     List<Partition> partitions =
         client.listPartitions(testTables[3].getDbName(), testTables[3].getTableName(), (short)-1);
@@ -530,6 +537,8 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest {
   @Test
   public void testDropTableDeleteDir() throws Exception {
     Table table = testTables[0];
+    Partition externalPartition = client.getPartition(partitionedTable.getDbName(),
+        partitionedTable.getTableName(), "test_part_col=a2");
 
     client.dropTable(table.getDbName(), table.getTableName(), true, false);
 
@@ -547,6 +556,9 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest {
 
     Assert.assertFalse("Table path should be removed",
         metaStore.isPathExists(new Path(partitionedTable.getSd().getLocation())));
+
+    Assert.assertFalse("Extra partition path should be removed",
+        metaStore.isPathExists(new Path(externalPartition.getSd().getLocation())));
   }
 
   @Test

[2/7] hive git commit: HIVE-19928 : Load Data for managed tables should set the owner of loaded files to a configurable user (Deepak Jaiswal, reviewed by Jason Dere)

Posted by se...@apache.org.

HIVE-19928 : Load Data for managed tables should set the owner of loaded files to a configurable user (Deepak Jaiswal, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6adab1c2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6adab1c2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6adab1c2

Branch: refs/heads/master-txnstats
Commit: 6adab1c2ab18558e1dd6e353e6f4dbb249a3d8e6
Parents: 6a87f7f
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Thu Jun 21 13:28:21 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Thu Jun 21 13:28:21 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |   3 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 116 ++++++++++++-------
 .../hive/ql/metadata/TestHiveCopyFiles.java     |  12 +-
 4 files changed, 82 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6adab1c2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e7f5fc0..39e5c00 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1561,6 +1561,8 @@ public class HiveConf extends Configuration {
     HIVE_STRICT_CHECKS_BUCKETING("hive.strict.checks.bucketing", true,
         "Enabling strict bucketing checks disallows the following:\n" +
         "  Load into bucketed tables."),
+    HIVE_LOAD_DATA_OWNER("hive.load.data.owner", "",
+        "Set the owner of files loaded using load data in managed tables."),
 
     @Deprecated
     HIVEMAPREDMODE("hive.mapred.mode", null,

http://git-wip-us.apache.org/repos/asf/hive/blob/6adab1c2/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index f80a945..19097f5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -140,7 +140,8 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
         deletePath = createTargetPath(targetPath, tgtFs);
       }
       Hive.clearDestForSubDirSrc(conf, targetPath, sourcePath, false);
-      if (!Hive.moveFile(conf, sourcePath, targetPath, true, false)) {
+      // Set isManaged to false as this is not load data operation for which it is needed.
+      if (!Hive.moveFile(conf, sourcePath, targetPath, true, false, false)) {
         try {
           if (deletePath != null) {
             tgtFs.delete(deletePath, true);

http://git-wip-us.apache.org/repos/asf/hive/blob/6adab1c2/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 2ec131e..eab9f4a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -63,21 +63,13 @@ import javax.jdo.JDODataStoreException;
 
 import com.google.common.collect.ImmutableList;
 import org.apache.calcite.plan.RelOptMaterialization;
-import org.apache.calcite.plan.RelOptRule;
-import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgramBuilder;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.RelVisitor;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.type.RelDataType;
-import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexBuilder;
-import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.sql.fun.SqlStdOperatorTable;
-import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.calcite.tools.RelBuilder;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileChecksum;
@@ -86,14 +78,13 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.HiveStatsUtils;
-import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
-import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
 import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.hive.common.log.InPlaceUpdate;
@@ -180,7 +171,6 @@ import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule;
 import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
@@ -202,7 +192,6 @@ import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hive.common.util.TxnIdUtils;
 import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -1727,16 +1716,9 @@ public class Hive {
       } else {
         newPartPath = oldPartPath;
       }
-      List<Path> newFiles = null;
+      List<Path> newFiles = Collections.synchronizedList(new ArrayList<Path>());
 
       perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
-      // If config is set, table is not temporary and partition being inserted exists, capture
-      // the list of files added. For not yet existing partitions (insert overwrite to new partition
-      // or dynamic partition inserts), the add partition event will capture the list of files added.
-      if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
-        newFiles = Collections.synchronizedList(new ArrayList<Path>());
-      }
-
 
       // Note: the stats for ACID tables do not have any coordination with either Hive ACID logic
       //       like txn commits, time outs, etc.; nor the lower level sync in metastore pertaining
@@ -1771,6 +1753,8 @@ public class Hive {
         if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
           Utilities.FILE_OP_LOGGER.trace("moving " + loadPath + " to " + destPath);
         }
+
+        boolean isManaged = tbl.getTableType().equals(TableType.MANAGED_TABLE.toString());
         // TODO: why is "&& !isAcidIUDoperation" needed here?
         if (!isTxnTable && ((loadFileType == LoadFileType.REPLACE_ALL) || (oldPart == null && !isAcidIUDoperation))) {
           //for fullAcid tables we don't delete files for commands with OVERWRITE - we create a new
@@ -1779,12 +1763,12 @@ public class Hive {
           boolean needRecycle = !tbl.isTemporary()
                   && ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName()));
           replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal,
-              isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle);
+              isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged);
         } else {
           FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
           copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation,
               (loadFileType == LoadFileType.OVERWRITE_EXISTING), newFiles,
-              tbl.getNumBuckets() > 0, isFullAcidTable);
+              tbl.getNumBuckets() > 0, isFullAcidTable, isManaged);
         }
       }
       perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
@@ -1792,13 +1776,13 @@ public class Hive {
       alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
       validatePartition(newTPart);
 
+      // If config is set, table is not temporary and partition being inserted exists, capture
+      // the list of files added. For not yet existing partitions (insert overwrite to new partition
+      // or dynamic partition inserts), the add partition event will capture the list of files added.
       // Generate an insert event only if inserting into an existing partition
       // When inserting into a new partition, the add partition event takes care of insert event
-      if ((null != oldPart) && (null != newFiles)) {
+      if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
         fireInsertEvent(tbl, partSpec, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
-      } else {
-        LOG.debug("No new files were created, and is not a replace, or we're inserting into a "
-                + "partition that does not exist yet. Skipping generating INSERT event.");
       }
 
       // column stats will be inaccurate
@@ -1871,6 +1855,7 @@ public class Hive {
       } else {
         setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
       }
+
       perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION);
       return newTPart;
     } catch (IOException e) {
@@ -2316,15 +2301,12 @@ private void constructOneLBLocationMap(FileStatus fSta,
     PerfLogger perfLogger = SessionState.getPerfLogger();
     perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_TABLE);
 
-    List<Path> newFiles = null;
+    List<Path> newFiles = Collections.synchronizedList(new ArrayList<Path>());
     Table tbl = getTable(tableName);
     assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName();
     boolean isTxnTable = AcidUtils.isTransactionalTable(tbl);
     boolean isMmTable = AcidUtils.isInsertOnlyTable(tbl);
     boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl);
-    if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) {
-      newFiles = Collections.synchronizedList(new ArrayList<Path>());
-    }
 
     // Note: this assumes both paths are qualified; which they are, currently.
     if ((isMmTable || isFullAcidTable) && loadPath.equals(tbl.getPath())) {
@@ -2356,19 +2338,21 @@ private void constructOneLBLocationMap(FileStatus fSta,
 
       perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
 
+      boolean isManaged = tbl.getTableType().equals(TableType.MANAGED_TABLE.toString());
+
       if (loadFileType == LoadFileType.REPLACE_ALL && !isTxnTable) {
         //for fullAcid we don't want to delete any files even for OVERWRITE see HIVE-14988/HIVE-17361
         boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
         boolean needRecycle = !tbl.isTemporary()
                 && ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName()));
         replaceFiles(tblPath, loadPath, destPath, tblPath, conf, isSrcLocal, isAutopurge,
-            newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle);
+            newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged);
       } else {
         try {
           FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
           copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation,
               loadFileType == LoadFileType.OVERWRITE_EXISTING, newFiles,
-              tbl.getNumBuckets() > 0 ? true : false, isFullAcidTable);
+              tbl.getNumBuckets() > 0, isFullAcidTable, isManaged);
         } catch (IOException e) {
           throw new HiveException("addFiles: filesystem error in check phase", e);
         }
@@ -2406,7 +2390,11 @@ private void constructOneLBLocationMap(FileStatus fSta,
 
     alterTable(tbl, environmentContext);
 
-    fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
+    if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary()) {
+      fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
+    } else {
+      fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), null);
+    }
 
     perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_TABLE);
   }
@@ -3320,7 +3308,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
   private static void copyFiles(final HiveConf conf, final FileSystem destFs,
             FileStatus[] srcs, final FileSystem srcFs, final Path destf,
             final boolean isSrcLocal, boolean isOverwrite,
-            final List<Path> newFiles, boolean acidRename) throws HiveException {
+            final List<Path> newFiles, boolean acidRename, boolean isManaged) throws HiveException {
 
     final HdfsUtils.HadoopFileStatus fullDestStatus;
     try {
@@ -3342,6 +3330,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
     int taskId = 0;
     // Sort the files
     Arrays.sort(srcs);
+    String configuredOwner = HiveConf.getVar(conf, ConfVars.HIVE_LOAD_DATA_OWNER);
     for (FileStatus src : srcs) {
       FileStatus[] files;
       if (src.isDirectory()) {
@@ -3362,7 +3351,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
       Arrays.sort(files);
       for (final FileStatus srcFile : files) {
         final Path srcP = srcFile.getPath();
-        final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
+        final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs, configuredOwner, isManaged);
 
         final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
 
@@ -3604,7 +3593,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
   //from mv command if the destf is a directory, it replaces the destf instead of moving under
   //the destf. in this case, the replaced destf still preserves the original destf's permission
   public static boolean moveFile(final HiveConf conf, Path srcf, final Path destf, boolean replace,
-                                 boolean isSrcLocal) throws HiveException {
+                                 boolean isSrcLocal, boolean isManaged) throws HiveException {
     final FileSystem srcFs, destFs;
     try {
       destFs = destf.getFileSystem(conf);
@@ -3620,6 +3609,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
 
     HdfsUtils.HadoopFileStatus destStatus = null;
+    String configuredOwner = HiveConf.getVar(conf, ConfVars.HIVE_LOAD_DATA_OWNER);
 
     // If source path is a subdirectory of the destination path (or the other way around):
     //   ex: INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
@@ -3653,7 +3643,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
         destFs.copyFromLocalFile(srcf, destf);
         return true;
       } else {
-        if (needToCopy(srcf, destf, srcFs, destFs)) {
+        if (needToCopy(srcf, destf, srcFs, destFs, configuredOwner, isManaged)) {
           //copy if across file system or encryption zones.
           LOG.debug("Copying source " + srcf + " to " + destf + " because HDFS encryption zones are different.");
           return FileUtils.copy(srcf.getFileSystem(conf), srcf, destf.getFileSystem(conf), destf,
@@ -3802,12 +3792,47 @@ private void constructOneLBLocationMap(FileStatus fSta,
    * TODO- consider if need to do this for different file authority.
    * @throws HiveException
    */
-  static protected boolean needToCopy(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs) throws HiveException {
+  static private boolean needToCopy(Path srcf, Path destf, FileSystem srcFs,
+                                      FileSystem destFs, String configuredOwner, boolean isManaged) throws HiveException {
     //Check if different FileSystems
     if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
       return true;
     }
 
+    if (isManaged && !configuredOwner.isEmpty() && srcFs instanceof DistributedFileSystem) {
+      // Need some extra checks
+      // Get the running owner
+      FileStatus srcs;
+
+      try {
+        srcs = srcFs.getFileStatus(srcf);
+        String runningUser = UserGroupInformation.getLoginUser().getUserName();
+        boolean isOwned = FileUtils.isOwnerOfFileHierarchy(srcFs, srcs, configuredOwner, false);
+        if (configuredOwner.equals(runningUser)) {
+          // Check if owner has write permission, else it will have to copy
+          if (!(isOwned &&
+              FileUtils.isActionPermittedForFileHierarchy(
+                  srcFs, srcs, configuredOwner, FsAction.WRITE, false))) {
+            return true;
+          }
+        } else {
+          // If the configured owner does not own the file, throw
+          if (!isOwned) {
+            throw new HiveException("Load Data failed for " + srcf + " as the file is not owned by "
+            + configuredOwner + " and load data is also not ran as " + configuredOwner);
+          } else {
+            return true;
+          }
+        }
+      } catch (IOException e) {
+        throw new HiveException("Could not fetch FileStatus for source file");
+      } catch (HiveException e) {
+        throw new HiveException(e);
+      } catch (Exception e) {
+        throw new HiveException(" Failed in looking up Permissions on file + " + srcf);
+      }
+    }
+
     //Check if different encryption zones
     HadoopShims.HdfsEncryptionShim srcHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(srcFs);
     HadoopShims.HdfsEncryptionShim destHdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(destFs);
@@ -3833,12 +3858,13 @@ private void constructOneLBLocationMap(FileStatus fSta,
    * @param isOverwrite if true, then overwrite if destination file exist, else add a duplicate copy
    * @param newFiles if this is non-null, a list of files that were created as a result of this
    *                 move will be returned.
+   * @param isManaged if table is managed.
    * @throws HiveException
    */
   static protected void copyFiles(HiveConf conf, Path srcf, Path destf, FileSystem fs,
                                   boolean isSrcLocal, boolean isAcidIUD,
                                   boolean isOverwrite, List<Path> newFiles, boolean isBucketed,
-                                  boolean isFullAcidTable) throws HiveException {
+                                  boolean isFullAcidTable, boolean isManaged) throws HiveException {
     try {
       // create the destination if it does not exist
       if (!fs.exists(destf)) {
@@ -3874,7 +3900,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
       // i.e, like 000000_0, 000001_0_copy_1, 000002_0.gz etc.
       // The extension is only maintained for files which are compressed.
       copyFiles(conf, fs, srcs, srcFs, destf, isSrcLocal, isOverwrite,
-              newFiles, isFullAcidTable && !isBucketed);
+              newFiles, isFullAcidTable && !isBucketed, isManaged);
     }
   }
 
@@ -4030,10 +4056,12 @@ private void constructOneLBLocationMap(FileStatus fSta,
    *          If the source directory is LOCAL
    * @param newFiles
    *          Output the list of new files replaced in the destination path
+   * @param isManaged
+   *          If the table is managed.
    */
   protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, HiveConf conf,
           boolean isSrcLocal, boolean purge, List<Path> newFiles, PathFilter deletePathFilter,
-          boolean isNeedRecycle) throws HiveException {
+          boolean isNeedRecycle, boolean isManaged) throws HiveException {
     try {
 
       FileSystem destFs = destf.getFileSystem(conf);
@@ -4070,7 +4098,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
       // 2. srcs must be a list of files -- ensured by LoadSemanticAnalyzer
       // in both cases, we move the file under destf
       if (srcs.length == 1 && srcs[0].isDirectory()) {
-        if (!moveFile(conf, srcs[0].getPath(), destf, true, isSrcLocal)) {
+        if (!moveFile(conf, srcs[0].getPath(), destf, true, isSrcLocal, isManaged)) {
           throw new IOException("Error moving: " + srcf + " into: " + destf);
         }
 
@@ -4082,7 +4110,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
         // its either a file or glob
         for (FileStatus src : srcs) {
           Path destFile = new Path(destf, src.getPath().getName());
-          if (!moveFile(conf, src.getPath(), destFile, true, isSrcLocal)) {
+          if (!moveFile(conf, src.getPath(), destFile, true, isSrcLocal, isManaged)) {
             throw new IOException("Error moving: " + srcf + " into: " + destf);
           }
 
@@ -4334,7 +4362,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
     return metaStoreClient;
   }
 
-  private String getUserName() {
+  private static String getUserName() {
     return SessionState.getUserFromAuthenticator();
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/6adab1c2/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java
index a20a2ae..a0c23b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveCopyFiles.java
@@ -83,7 +83,7 @@ public class TestHiveCopyFiles {
     FileSystem targetFs = targetPath.getFileSystem(hiveConf);
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false,null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false,null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);
@@ -107,7 +107,7 @@ public class TestHiveCopyFiles {
     FileSystem targetFs = targetPath.getFileSystem(hiveConf);
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false, null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false, null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);
@@ -127,7 +127,7 @@ public class TestHiveCopyFiles {
     sourceFolder.newFile("000001_0.gz");
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false, null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, targetFs, isSourceLocal, NO_ACID, false, null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);
@@ -158,7 +158,7 @@ public class TestHiveCopyFiles {
     Mockito.when(spyTargetFs.getUri()).thenReturn(URI.create("hdfs://" + targetPath.toUri().getPath()));
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);
@@ -185,7 +185,7 @@ public class TestHiveCopyFiles {
     Mockito.when(spyTargetFs.getUri()).thenReturn(URI.create("hdfs://" + targetPath.toUri().getPath()));
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);
@@ -205,7 +205,7 @@ public class TestHiveCopyFiles {
     sourceFolder.newFile("000001_0.gz");
 
     try {
-      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false);
+      Hive.copyFiles(hiveConf, sourcePath, targetPath, spyTargetFs, isSourceLocal, NO_ACID, false, null, false, false, false);
     } catch (HiveException e) {
       e.printStackTrace();
       assertTrue("Hive.copyFiles() threw an unexpected exception.", false);

[7/7] hive git commit: HIVE-19416 : merge master into branch (Sergey Shelukhin)

Posted by se...@apache.org.

HIVE-19416 : merge master into branch (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4743c798
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4743c798
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4743c798

Branch: refs/heads/master-txnstats
Commit: 4743c7984d8a547cab135f9fc0fc550ac32dd61f
Parents: 42a9f3b 6d532e7
Author: sergey <se...@apache.org>
Authored: Fri Jun 22 11:06:12 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Fri Jun 22 11:06:12 2018 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../hcatalog/pig/AbstractHCatStorerTest.java    |   2 +-
 .../pig/TestHCatLoaderComplexSchema.java        |   3 +
 .../hive/hcatalog/pig/TestHCatStorer.java       |   4 +-
 .../listener/DummyRawStoreFailEvent.java        |   6 +
 .../test/resources/testconfiguration.properties |   1 +
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |   3 +-
 .../apache/hadoop/hive/ql/io/IOConstants.java   |   1 +
 .../ql/io/JsonFileStorageFormatDescriptor.java  |  51 ++
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 104 ++--
 .../rules/HiveFilterProjectTransposeRule.java   |   8 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |  64 ++-
 ...he.hadoop.hive.ql.io.StorageFormatDescriptor |   1 +
 .../hive/ql/io/TestStorageFormatDescriptor.java |   3 +
 .../hive/ql/metadata/TestHiveCopyFiles.java     |  12 +-
 .../clientpositive/cbo_ppd_non_deterministic.q  |  42 ++
 .../test/queries/clientpositive/json_serde1.q   |   9 +-
 .../vector_parquet_nested_two_level_complex.q   |  67 +++
 .../cbo_ppd_non_deterministic.q.out             | 195 +++++++
 .../results/clientpositive/json_serde1.q.out    |  44 +-
 ...ector_parquet_nested_two_level_complex.q.out | 540 +++++++++++++++++++
 .../results/clientpositive/ppd_udf_col.q.out    |  62 ++-
 .../results/clientpositive/union_offcbo.q.out   |  64 ++-
 .../hadoop/hive/metastore/HiveMetaStore.java    | 111 ++--
 .../hadoop/hive/metastore/ObjectStore.java      |  46 ++
 .../apache/hadoop/hive/metastore/RawStore.java  |  15 +
 .../hive/metastore/cache/CachedStore.java       |   6 +
 .../hadoop/hive/metastore/utils/FileUtils.java  |  14 +
 .../DummyRawStoreControlledCommit.java          |   6 +
 .../DummyRawStoreForJdoConnection.java          |   6 +
 .../client/MetaStoreFactoryForTests.java        |   1 +
 .../TestTablesCreateDropAlterTruncate.java      |  14 +-
 32 files changed, 1337 insertions(+), 170 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --cc ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 3918e62,eab9f4a..f9fab96
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@@ -78,8 -78,13 +78,9 @@@ import org.apache.hadoop.fs.FileSystem
  import org.apache.hadoop.fs.Options;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.fs.PathFilter;
+ import org.apache.hadoop.fs.permission.FsAction;
  import org.apache.hadoop.hdfs.DistributedFileSystem;
 -import org.apache.hadoop.hive.common.FileUtils;
 -import org.apache.hadoop.hive.common.HiveStatsUtils;
 -import org.apache.hadoop.hive.common.ObjectPair;
 -import org.apache.hadoop.hive.common.StatsSetupConst;
 -import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
 +import org.apache.hadoop.hive.common.*;
  import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
  import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
  import org.apache.hadoop.hive.common.log.InPlaceUpdate;
@@@ -1815,15 -1775,14 +1811,15 @@@ public class Hive 
        Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
        alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
        validatePartition(newTPart);
 +      setTableSnapshotForTransactionalPartition(conf, newTPart);
  
+       // If config is set, table is not temporary and partition being inserted exists, capture
+       // the list of files added. For not yet existing partitions (insert overwrite to new partition
+       // or dynamic partition inserts), the add partition event will capture the list of files added.
        // Generate an insert event only if inserting into an existing partition
        // When inserting into a new partition, the add partition event takes care of insert event
-       if ((null != oldPart) && (null != newFiles)) {
+       if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
          fireInsertEvent(tbl, partSpec, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
-       } else {
-         LOG.debug("No new files were created, and is not a replace, or we're inserting into a "
-                 + "partition that does not exist yet. Skipping generating INSERT event.");
        }
  
        // column stats will be inaccurate

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hive/blob/4743c798/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
----------------------------------------------------------------------