You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2019/04/16 06:50:15 UTC

[hive] branch branch-3.1 updated (2b77d69 -> 8290316)

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a change to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git.


    from 2b77d69  HIVE-21600: GenTezUtils.removeSemiJoinOperator may throw out of bounds exception for TS with multiple children (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
     new 83e39ae  HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)
     new 8290316  HIVE-21564: Load data into a bucketed table is ignoring partitions specs and loads data into default partition (Sankar Hariappan, reviewed by Jesus Camacho Rodriguez)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |  62 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  21 +-
 .../queries/clientpositive/load_data_using_job.q   |  18 +-
 .../load_static_ptn_into_bucketed_table.q          |  42 +
 .../clientpositive/llap/load_data_using_job.q.out  | 110 ++-
 .../load_static_ptn_into_bucketed_table.q.out      | 857 +++++++++++++++++++++
 8 files changed, 1097 insertions(+), 13 deletions(-)
 create mode 100644 data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
 create mode 100644 data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
 create mode 100644 ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
 create mode 100644 ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out


[hive] 02/02: HIVE-21564: Load data into a bucketed table is ignoring partitions specs and loads data into default partition (Sankar Hariappan, reviewed by Jesus Camacho Rodriguez)

Posted by sa...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 8290316cd1af701e75adf1e357c6b3fe8409f68f
Author: Sankar Hariappan <sa...@apache.org>
AuthorDate: Tue Apr 16 11:52:20 2019 +0530

    HIVE-21564: Load data into a bucketed table is ignoring partitions specs and loads data into default partition (Sankar Hariappan, reviewed by Jesus Camacho Rodriguez)
    
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |  57 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  21 +-
 .../load_static_ptn_into_bucketed_table.q          |  42 +
 .../load_static_ptn_into_bucketed_table.q.out      | 857 +++++++++++++++++++++
 4 files changed, 970 insertions(+), 7 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index ee12f64..73a688b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -471,11 +471,6 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
     String tempTblName = table.getTableName() + tempTblNameSuffix;
     tempTableObj.setTableName(tempTblName);
 
-    // Move all the partition columns at the end of table columns
-    tempTableObj.setFields(table.getAllCols());
-    // wipe out partition columns
-    tempTableObj.setPartCols(new ArrayList<>());
-
     // Reset table params
     tempTableObj.setParameters(new HashMap<>());
 
@@ -490,12 +485,62 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
       }
     }
 
+    // Make the columns list for the temp table (input data file).
+    // Move all the partition columns at the end of table columns.
+    ArrayList<FieldSchema> colList = new ArrayList<FieldSchema>();
+    colList.addAll(table.getCols());
+
+    // inpPartSpec is a mapping from partition column name to its value.
+    Map<String, String> inpPartSpec = null;
+
+    // Partition spec was already validated by caller when create TableSpec object.
+    // So, need not validate inpPartSpec here.
+    List<FieldSchema> parts = table.getPartCols();
+    if (tableTree.getChildCount() >= 2) {
+      ASTNode partSpecNode = (ASTNode) tableTree.getChild(1);
+      inpPartSpec = new HashMap<>(partSpecNode.getChildCount());
+
+      for (int i = 0; i < partSpecNode.getChildCount(); ++i) {
+        ASTNode partSpecValNode = (ASTNode) partSpecNode.getChild(i);
+        String partVal = null;
+        String partColName = unescapeIdentifier(partSpecValNode.getChild(0).getText().toLowerCase());
+
+        if (partSpecValNode.getChildCount() >= 2) { // in the form of T partition (ds="2010-03-03")
+          // Not stripping quotes here as we need to use it as it is while framing PARTITION clause
+          // in INSERT query.
+          partVal = partSpecValNode.getChild(1).getText();
+        }
+        inpPartSpec.put(partColName, partVal);
+      }
+
+      // Add only dynamic partition columns to the temp table (input data file).
+      // For static partitions, values would be obtained from partition(key=value...) clause.
+      for (FieldSchema fs : parts) {
+        String partKey = fs.getName();
+
+        // If a partition value is not there, then it is dynamic partition key.
+        if (inpPartSpec.get(partKey) == null) {
+          colList.add(fs);
+        }
+      }
+    } else {
+      // No static partitions specified and hence all are dynamic partition keys and need to be part
+      // of temp table (input data file).
+      colList.addAll(parts);
+    }
+
+    // Set columns list for temp table.
+    tempTableObj.setFields(colList);
+
+    // Wipe out partition columns
+    tempTableObj.setPartCols(new ArrayList<>());
+
     // Step 2 : create the Insert query
     StringBuilder rewrittenQueryStr = new StringBuilder();
 
     rewrittenQueryStr.append("insert into table ");
     rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0))));
-    addPartitionColsToInsert(table.getPartCols(), rewrittenQueryStr);
+    addPartitionColsToInsert(table.getPartCols(), inpPartSpec, rewrittenQueryStr);
     rewrittenQueryStr.append(" select * from ");
     rewrittenQueryStr.append(tempTblName);
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index f8a4e11..3eddace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -14924,6 +14924,20 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
    */
   protected void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) {
+    addPartitionColsToInsert(partCols, null, rewrittenQueryStr);
+  }
+
+  /**
+   * Append list of partition columns to Insert statement. If user specified partition spec, then
+   * use it to get/set the value for partition column else use dynamic partition mode with no value.
+   * Static partition mode:
+   * INSERT INTO T PARTITION(partCol1=val1,partCol2...) SELECT col1, ... partCol1,partCol2...
+   * Dynamic partition mode:
+   * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
+   */
+  protected void addPartitionColsToInsert(List<FieldSchema> partCols,
+                                          Map<String, String> partSpec,
+                                          StringBuilder rewrittenQueryStr) {
     // If the table is partitioned we have to put the partition() clause in
     if (partCols != null && partCols.size() > 0) {
       rewrittenQueryStr.append(" partition (");
@@ -14933,8 +14947,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
           first = false;
         else
           rewrittenQueryStr.append(", ");
-        //would be nice if there was a way to determine if quotes are needed
+
+        // Would be nice if there was a way to determine if quotes are needed
         rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf));
+        String partVal = (partSpec != null) ? partSpec.get(fschema.getName()) : null;
+        if (partVal != null) {
+          rewrittenQueryStr.append("=").append(partVal);
+        }
       }
       rewrittenQueryStr.append(")");
     }
diff --git a/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q b/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
new file mode 100644
index 0000000..6b6d160
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
@@ -0,0 +1,42 @@
+set hive.stats.column.autogather=false;
+set hive.strict.checks.bucketing=true;
+
+set hive.explain.user=false;
+
+-- SORT_QUERY_RESULTS
+
+-- Single key partition
+-- Load with full partition spec
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08');
+select * from src_bucket_tbl where ds='2008-04-08';
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec where dynamic partition value is not in file and expected to use default partition.
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10);
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10);
+select * from src_bucket_tbl where hr=10;
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec where dynamic partition value present in file.
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds);
+load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds);
+select * from src_bucket_tbl where hr=20 and ds='2008-04-08';
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07');
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07');
+select * from src_bucket_tbl where hr=30 and ds='2010-05-07';
+
+drop table src_bucket_tbl;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out b/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out
new file mode 100644
index 0000000..d646077
--- /dev/null
+++ b/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out
@@ -0,0 +1,857 @@
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src_bucket_tbl__temp_table_for_load_data__
+            Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Map-reduce partition columns: _col0 (type: int)
+                Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: int), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.src_bucket_tbl
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 2008-04-08
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src_bucket_tbl
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(ds=2008-04-08).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(ds=2008-04-08).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@ds=2008-04-08
+#### A masked pattern was here ####
+0	val_0	2008-04-08
+0	val_0	2008-04-08
+0	val_0	2008-04-08
+103	val_103	2008-04-08
+103	val_103	2008-04-08
+11	val_11	2008-04-08
+114	val_114	2008-04-08
+118	val_118	2008-04-08
+118	val_118	2008-04-08
+125	val_125	2008-04-08
+125	val_125	2008-04-08
+129	val_129	2008-04-08
+129	val_129	2008-04-08
+136	val_136	2008-04-08
+143	val_143	2008-04-08
+15	val_15	2008-04-08
+15	val_15	2008-04-08
+150	val_150	2008-04-08
+158	val_158	2008-04-08
+165	val_165	2008-04-08
+165	val_165	2008-04-08
+169	val_169	2008-04-08
+169	val_169	2008-04-08
+169	val_169	2008-04-08
+169	val_169	2008-04-08
+172	val_172	2008-04-08
+172	val_172	2008-04-08
+176	val_176	2008-04-08
+176	val_176	2008-04-08
+183	val_183	2008-04-08
+187	val_187	2008-04-08
+187	val_187	2008-04-08
+187	val_187	2008-04-08
+19	val_19	2008-04-08
+190	val_190	2008-04-08
+194	val_194	2008-04-08
+202	val_202	2008-04-08
+213	val_213	2008-04-08
+213	val_213	2008-04-08
+217	val_217	2008-04-08
+217	val_217	2008-04-08
+224	val_224	2008-04-08
+224	val_224	2008-04-08
+228	val_228	2008-04-08
+235	val_235	2008-04-08
+239	val_239	2008-04-08
+239	val_239	2008-04-08
+242	val_242	2008-04-08
+242	val_242	2008-04-08
+257	val_257	2008-04-08
+26	val_26	2008-04-08
+26	val_26	2008-04-08
+260	val_260	2008-04-08
+275	val_275	2008-04-08
+282	val_282	2008-04-08
+282	val_282	2008-04-08
+286	val_286	2008-04-08
+305	val_305	2008-04-08
+309	val_309	2008-04-08
+309	val_309	2008-04-08
+316	val_316	2008-04-08
+316	val_316	2008-04-08
+316	val_316	2008-04-08
+323	val_323	2008-04-08
+327	val_327	2008-04-08
+327	val_327	2008-04-08
+327	val_327	2008-04-08
+33	val_33	2008-04-08
+338	val_338	2008-04-08
+341	val_341	2008-04-08
+345	val_345	2008-04-08
+356	val_356	2008-04-08
+367	val_367	2008-04-08
+367	val_367	2008-04-08
+37	val_37	2008-04-08
+37	val_37	2008-04-08
+374	val_374	2008-04-08
+378	val_378	2008-04-08
+389	val_389	2008-04-08
+392	val_392	2008-04-08
+396	val_396	2008-04-08
+396	val_396	2008-04-08
+396	val_396	2008-04-08
+4	val_4	2008-04-08
+400	val_400	2008-04-08
+404	val_404	2008-04-08
+404	val_404	2008-04-08
+411	val_411	2008-04-08
+419	val_419	2008-04-08
+437	val_437	2008-04-08
+44	val_44	2008-04-08
+444	val_444	2008-04-08
+448	val_448	2008-04-08
+455	val_455	2008-04-08
+459	val_459	2008-04-08
+459	val_459	2008-04-08
+462	val_462	2008-04-08
+462	val_462	2008-04-08
+466	val_466	2008-04-08
+466	val_466	2008-04-08
+466	val_466	2008-04-08
+477	val_477	2008-04-08
+480	val_480	2008-04-08
+480	val_480	2008-04-08
+480	val_480	2008-04-08
+484	val_484	2008-04-08
+491	val_491	2008-04-08
+495	val_495	2008-04-08
+51	val_51	2008-04-08
+51	val_51	2008-04-08
+66	val_66	2008-04-08
+77	val_77	2008-04-08
+8	val_8	2008-04-08
+80	val_80	2008-04-08
+84	val_84	2008-04-08
+84	val_84	2008-04-08
+95	val_95	2008-04-08
+95	val_95	2008-04-08
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=10
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src_bucket_tbl__temp_table_for_load_data__
+            Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string), ds (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col2 (type: string), _bucket_number (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col2 (type: string)
+                value expressions: _col0 (type: int), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._bucket_number (type: string)
+          outputColumnNames: _col0, _col1, _col2, _bucket_number
+          File Output Operator
+            compressed: false
+            Dp Sort State: PARTITION_BUCKET_SORTED
+            Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.src_bucket_tbl
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+            hr 10
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src_bucket_tbl
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=10
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=10,ds=__HIVE_DEFAULT_PARTITION__).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=10,ds=__HIVE_DEFAULT_PARTITION__).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+0	val_0	10	__HIVE_DEFAULT_PARTITION__
+0	val_0	10	__HIVE_DEFAULT_PARTITION__
+0	val_0	10	__HIVE_DEFAULT_PARTITION__
+103	val_103	10	__HIVE_DEFAULT_PARTITION__
+103	val_103	10	__HIVE_DEFAULT_PARTITION__
+11	val_11	10	__HIVE_DEFAULT_PARTITION__
+114	val_114	10	__HIVE_DEFAULT_PARTITION__
+118	val_118	10	__HIVE_DEFAULT_PARTITION__
+118	val_118	10	__HIVE_DEFAULT_PARTITION__
+125	val_125	10	__HIVE_DEFAULT_PARTITION__
+125	val_125	10	__HIVE_DEFAULT_PARTITION__
+129	val_129	10	__HIVE_DEFAULT_PARTITION__
+129	val_129	10	__HIVE_DEFAULT_PARTITION__
+136	val_136	10	__HIVE_DEFAULT_PARTITION__
+143	val_143	10	__HIVE_DEFAULT_PARTITION__
+15	val_15	10	__HIVE_DEFAULT_PARTITION__
+15	val_15	10	__HIVE_DEFAULT_PARTITION__
+150	val_150	10	__HIVE_DEFAULT_PARTITION__
+158	val_158	10	__HIVE_DEFAULT_PARTITION__
+165	val_165	10	__HIVE_DEFAULT_PARTITION__
+165	val_165	10	__HIVE_DEFAULT_PARTITION__
+169	val_169	10	__HIVE_DEFAULT_PARTITION__
+169	val_169	10	__HIVE_DEFAULT_PARTITION__
+169	val_169	10	__HIVE_DEFAULT_PARTITION__
+169	val_169	10	__HIVE_DEFAULT_PARTITION__
+172	val_172	10	__HIVE_DEFAULT_PARTITION__
+172	val_172	10	__HIVE_DEFAULT_PARTITION__
+176	val_176	10	__HIVE_DEFAULT_PARTITION__
+176	val_176	10	__HIVE_DEFAULT_PARTITION__
+183	val_183	10	__HIVE_DEFAULT_PARTITION__
+187	val_187	10	__HIVE_DEFAULT_PARTITION__
+187	val_187	10	__HIVE_DEFAULT_PARTITION__
+187	val_187	10	__HIVE_DEFAULT_PARTITION__
+19	val_19	10	__HIVE_DEFAULT_PARTITION__
+190	val_190	10	__HIVE_DEFAULT_PARTITION__
+194	val_194	10	__HIVE_DEFAULT_PARTITION__
+202	val_202	10	__HIVE_DEFAULT_PARTITION__
+213	val_213	10	__HIVE_DEFAULT_PARTITION__
+213	val_213	10	__HIVE_DEFAULT_PARTITION__
+217	val_217	10	__HIVE_DEFAULT_PARTITION__
+217	val_217	10	__HIVE_DEFAULT_PARTITION__
+224	val_224	10	__HIVE_DEFAULT_PARTITION__
+224	val_224	10	__HIVE_DEFAULT_PARTITION__
+228	val_228	10	__HIVE_DEFAULT_PARTITION__
+235	val_235	10	__HIVE_DEFAULT_PARTITION__
+239	val_239	10	__HIVE_DEFAULT_PARTITION__
+239	val_239	10	__HIVE_DEFAULT_PARTITION__
+242	val_242	10	__HIVE_DEFAULT_PARTITION__
+242	val_242	10	__HIVE_DEFAULT_PARTITION__
+257	val_257	10	__HIVE_DEFAULT_PARTITION__
+26	val_26	10	__HIVE_DEFAULT_PARTITION__
+26	val_26	10	__HIVE_DEFAULT_PARTITION__
+260	val_260	10	__HIVE_DEFAULT_PARTITION__
+275	val_275	10	__HIVE_DEFAULT_PARTITION__
+282	val_282	10	__HIVE_DEFAULT_PARTITION__
+282	val_282	10	__HIVE_DEFAULT_PARTITION__
+286	val_286	10	__HIVE_DEFAULT_PARTITION__
+305	val_305	10	__HIVE_DEFAULT_PARTITION__
+309	val_309	10	__HIVE_DEFAULT_PARTITION__
+309	val_309	10	__HIVE_DEFAULT_PARTITION__
+316	val_316	10	__HIVE_DEFAULT_PARTITION__
+316	val_316	10	__HIVE_DEFAULT_PARTITION__
+316	val_316	10	__HIVE_DEFAULT_PARTITION__
+323	val_323	10	__HIVE_DEFAULT_PARTITION__
+327	val_327	10	__HIVE_DEFAULT_PARTITION__
+327	val_327	10	__HIVE_DEFAULT_PARTITION__
+327	val_327	10	__HIVE_DEFAULT_PARTITION__
+33	val_33	10	__HIVE_DEFAULT_PARTITION__
+338	val_338	10	__HIVE_DEFAULT_PARTITION__
+341	val_341	10	__HIVE_DEFAULT_PARTITION__
+345	val_345	10	__HIVE_DEFAULT_PARTITION__
+356	val_356	10	__HIVE_DEFAULT_PARTITION__
+367	val_367	10	__HIVE_DEFAULT_PARTITION__
+367	val_367	10	__HIVE_DEFAULT_PARTITION__
+37	val_37	10	__HIVE_DEFAULT_PARTITION__
+37	val_37	10	__HIVE_DEFAULT_PARTITION__
+374	val_374	10	__HIVE_DEFAULT_PARTITION__
+378	val_378	10	__HIVE_DEFAULT_PARTITION__
+389	val_389	10	__HIVE_DEFAULT_PARTITION__
+392	val_392	10	__HIVE_DEFAULT_PARTITION__
+396	val_396	10	__HIVE_DEFAULT_PARTITION__
+396	val_396	10	__HIVE_DEFAULT_PARTITION__
+396	val_396	10	__HIVE_DEFAULT_PARTITION__
+4	val_4	10	__HIVE_DEFAULT_PARTITION__
+400	val_400	10	__HIVE_DEFAULT_PARTITION__
+404	val_404	10	__HIVE_DEFAULT_PARTITION__
+404	val_404	10	__HIVE_DEFAULT_PARTITION__
+411	val_411	10	__HIVE_DEFAULT_PARTITION__
+419	val_419	10	__HIVE_DEFAULT_PARTITION__
+437	val_437	10	__HIVE_DEFAULT_PARTITION__
+44	val_44	10	__HIVE_DEFAULT_PARTITION__
+444	val_444	10	__HIVE_DEFAULT_PARTITION__
+448	val_448	10	__HIVE_DEFAULT_PARTITION__
+455	val_455	10	__HIVE_DEFAULT_PARTITION__
+459	val_459	10	__HIVE_DEFAULT_PARTITION__
+459	val_459	10	__HIVE_DEFAULT_PARTITION__
+462	val_462	10	__HIVE_DEFAULT_PARTITION__
+462	val_462	10	__HIVE_DEFAULT_PARTITION__
+466	val_466	10	__HIVE_DEFAULT_PARTITION__
+466	val_466	10	__HIVE_DEFAULT_PARTITION__
+466	val_466	10	__HIVE_DEFAULT_PARTITION__
+477	val_477	10	__HIVE_DEFAULT_PARTITION__
+480	val_480	10	__HIVE_DEFAULT_PARTITION__
+480	val_480	10	__HIVE_DEFAULT_PARTITION__
+480	val_480	10	__HIVE_DEFAULT_PARTITION__
+484	val_484	10	__HIVE_DEFAULT_PARTITION__
+491	val_491	10	__HIVE_DEFAULT_PARTITION__
+495	val_495	10	__HIVE_DEFAULT_PARTITION__
+51	val_51	10	__HIVE_DEFAULT_PARTITION__
+51	val_51	10	__HIVE_DEFAULT_PARTITION__
+66	val_66	10	__HIVE_DEFAULT_PARTITION__
+77	val_77	10	__HIVE_DEFAULT_PARTITION__
+8	val_8	10	__HIVE_DEFAULT_PARTITION__
+80	val_80	10	__HIVE_DEFAULT_PARTITION__
+84	val_84	10	__HIVE_DEFAULT_PARTITION__
+84	val_84	10	__HIVE_DEFAULT_PARTITION__
+95	val_95	10	__HIVE_DEFAULT_PARTITION__
+95	val_95	10	__HIVE_DEFAULT_PARTITION__
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=20
+POSTHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src_bucket_tbl__temp_table_for_load_data__
+            Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string), ds (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col2 (type: string), _bucket_number (type: string)
+                sort order: ++
+                Map-reduce partition columns: _col2 (type: string)
+                value expressions: _col0 (type: int), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._bucket_number (type: string)
+          outputColumnNames: _col0, _col1, _col2, _bucket_number
+          File Output Operator
+            compressed: false
+            Dp Sort State: PARTITION_BUCKET_SORTED
+            Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.src_bucket_tbl
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 
+            hr 20
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src_bucket_tbl
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=20
+POSTHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=20/ds=2008-04-08
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=20,ds=2008-04-08).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=20,ds=2008-04-08).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=20 and ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=20/ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=20 and ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=20/ds=2008-04-08
+#### A masked pattern was here ####
+0	val_0	20	2008-04-08
+0	val_0	20	2008-04-08
+0	val_0	20	2008-04-08
+103	val_103	20	2008-04-08
+103	val_103	20	2008-04-08
+11	val_11	20	2008-04-08
+114	val_114	20	2008-04-08
+118	val_118	20	2008-04-08
+118	val_118	20	2008-04-08
+125	val_125	20	2008-04-08
+125	val_125	20	2008-04-08
+129	val_129	20	2008-04-08
+129	val_129	20	2008-04-08
+136	val_136	20	2008-04-08
+143	val_143	20	2008-04-08
+15	val_15	20	2008-04-08
+15	val_15	20	2008-04-08
+150	val_150	20	2008-04-08
+158	val_158	20	2008-04-08
+165	val_165	20	2008-04-08
+165	val_165	20	2008-04-08
+169	val_169	20	2008-04-08
+169	val_169	20	2008-04-08
+169	val_169	20	2008-04-08
+169	val_169	20	2008-04-08
+172	val_172	20	2008-04-08
+172	val_172	20	2008-04-08
+176	val_176	20	2008-04-08
+176	val_176	20	2008-04-08
+183	val_183	20	2008-04-08
+187	val_187	20	2008-04-08
+187	val_187	20	2008-04-08
+187	val_187	20	2008-04-08
+19	val_19	20	2008-04-08
+190	val_190	20	2008-04-08
+194	val_194	20	2008-04-08
+202	val_202	20	2008-04-08
+213	val_213	20	2008-04-08
+213	val_213	20	2008-04-08
+217	val_217	20	2008-04-08
+217	val_217	20	2008-04-08
+224	val_224	20	2008-04-08
+224	val_224	20	2008-04-08
+228	val_228	20	2008-04-08
+235	val_235	20	2008-04-08
+239	val_239	20	2008-04-08
+239	val_239	20	2008-04-08
+242	val_242	20	2008-04-08
+242	val_242	20	2008-04-08
+257	val_257	20	2008-04-08
+26	val_26	20	2008-04-08
+26	val_26	20	2008-04-08
+260	val_260	20	2008-04-08
+275	val_275	20	2008-04-08
+282	val_282	20	2008-04-08
+282	val_282	20	2008-04-08
+286	val_286	20	2008-04-08
+305	val_305	20	2008-04-08
+309	val_309	20	2008-04-08
+309	val_309	20	2008-04-08
+316	val_316	20	2008-04-08
+316	val_316	20	2008-04-08
+316	val_316	20	2008-04-08
+323	val_323	20	2008-04-08
+327	val_327	20	2008-04-08
+327	val_327	20	2008-04-08
+327	val_327	20	2008-04-08
+33	val_33	20	2008-04-08
+338	val_338	20	2008-04-08
+341	val_341	20	2008-04-08
+345	val_345	20	2008-04-08
+356	val_356	20	2008-04-08
+367	val_367	20	2008-04-08
+367	val_367	20	2008-04-08
+37	val_37	20	2008-04-08
+37	val_37	20	2008-04-08
+374	val_374	20	2008-04-08
+378	val_378	20	2008-04-08
+389	val_389	20	2008-04-08
+392	val_392	20	2008-04-08
+396	val_396	20	2008-04-08
+396	val_396	20	2008-04-08
+396	val_396	20	2008-04-08
+4	val_4	20	2008-04-08
+400	val_400	20	2008-04-08
+404	val_404	20	2008-04-08
+404	val_404	20	2008-04-08
+411	val_411	20	2008-04-08
+419	val_419	20	2008-04-08
+437	val_437	20	2008-04-08
+44	val_44	20	2008-04-08
+444	val_444	20	2008-04-08
+448	val_448	20	2008-04-08
+455	val_455	20	2008-04-08
+459	val_459	20	2008-04-08
+459	val_459	20	2008-04-08
+462	val_462	20	2008-04-08
+462	val_462	20	2008-04-08
+466	val_466	20	2008-04-08
+466	val_466	20	2008-04-08
+466	val_466	20	2008-04-08
+477	val_477	20	2008-04-08
+480	val_480	20	2008-04-08
+480	val_480	20	2008-04-08
+480	val_480	20	2008-04-08
+484	val_484	20	2008-04-08
+491	val_491	20	2008-04-08
+495	val_495	20	2008-04-08
+51	val_51	20	2008-04-08
+51	val_51	20	2008-04-08
+66	val_66	20	2008-04-08
+77	val_77	20	2008-04-08
+8	val_8	20	2008-04-08
+80	val_80	20	2008-04-08
+84	val_84	20	2008-04-08
+84	val_84	20	2008-04-08
+95	val_95	20	2008-04-08
+95	val_95	20	2008-04-08
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src_bucket_tbl__temp_table_for_load_data__
+            Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Map-reduce partition columns: _col0 (type: int)
+                Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+                value expressions: _col0 (type: int), _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.src_bucket_tbl
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 2010-05-07
+            hr 30
+          replace: false
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.src_bucket_tbl
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=30,ds=2010-05-07).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=30,ds=2010-05-07).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=30 and ds='2010-05-07'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=30/ds=2010-05-07
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=30 and ds='2010-05-07'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=30/ds=2010-05-07
+#### A masked pattern was here ####
+0	val_0	30	2010-05-07
+0	val_0	30	2010-05-07
+0	val_0	30	2010-05-07
+103	val_103	30	2010-05-07
+103	val_103	30	2010-05-07
+11	val_11	30	2010-05-07
+114	val_114	30	2010-05-07
+118	val_118	30	2010-05-07
+118	val_118	30	2010-05-07
+125	val_125	30	2010-05-07
+125	val_125	30	2010-05-07
+129	val_129	30	2010-05-07
+129	val_129	30	2010-05-07
+136	val_136	30	2010-05-07
+143	val_143	30	2010-05-07
+15	val_15	30	2010-05-07
+15	val_15	30	2010-05-07
+150	val_150	30	2010-05-07
+158	val_158	30	2010-05-07
+165	val_165	30	2010-05-07
+165	val_165	30	2010-05-07
+169	val_169	30	2010-05-07
+169	val_169	30	2010-05-07
+169	val_169	30	2010-05-07
+169	val_169	30	2010-05-07
+172	val_172	30	2010-05-07
+172	val_172	30	2010-05-07
+176	val_176	30	2010-05-07
+176	val_176	30	2010-05-07
+183	val_183	30	2010-05-07
+187	val_187	30	2010-05-07
+187	val_187	30	2010-05-07
+187	val_187	30	2010-05-07
+19	val_19	30	2010-05-07
+190	val_190	30	2010-05-07
+194	val_194	30	2010-05-07
+202	val_202	30	2010-05-07
+213	val_213	30	2010-05-07
+213	val_213	30	2010-05-07
+217	val_217	30	2010-05-07
+217	val_217	30	2010-05-07
+224	val_224	30	2010-05-07
+224	val_224	30	2010-05-07
+228	val_228	30	2010-05-07
+235	val_235	30	2010-05-07
+239	val_239	30	2010-05-07
+239	val_239	30	2010-05-07
+242	val_242	30	2010-05-07
+242	val_242	30	2010-05-07
+257	val_257	30	2010-05-07
+26	val_26	30	2010-05-07
+26	val_26	30	2010-05-07
+260	val_260	30	2010-05-07
+275	val_275	30	2010-05-07
+282	val_282	30	2010-05-07
+282	val_282	30	2010-05-07
+286	val_286	30	2010-05-07
+305	val_305	30	2010-05-07
+309	val_309	30	2010-05-07
+309	val_309	30	2010-05-07
+316	val_316	30	2010-05-07
+316	val_316	30	2010-05-07
+316	val_316	30	2010-05-07
+323	val_323	30	2010-05-07
+327	val_327	30	2010-05-07
+327	val_327	30	2010-05-07
+327	val_327	30	2010-05-07
+33	val_33	30	2010-05-07
+338	val_338	30	2010-05-07
+341	val_341	30	2010-05-07
+345	val_345	30	2010-05-07
+356	val_356	30	2010-05-07
+367	val_367	30	2010-05-07
+367	val_367	30	2010-05-07
+37	val_37	30	2010-05-07
+37	val_37	30	2010-05-07
+374	val_374	30	2010-05-07
+378	val_378	30	2010-05-07
+389	val_389	30	2010-05-07
+392	val_392	30	2010-05-07
+396	val_396	30	2010-05-07
+396	val_396	30	2010-05-07
+396	val_396	30	2010-05-07
+4	val_4	30	2010-05-07
+400	val_400	30	2010-05-07
+404	val_404	30	2010-05-07
+404	val_404	30	2010-05-07
+411	val_411	30	2010-05-07
+419	val_419	30	2010-05-07
+437	val_437	30	2010-05-07
+44	val_44	30	2010-05-07
+444	val_444	30	2010-05-07
+448	val_448	30	2010-05-07
+455	val_455	30	2010-05-07
+459	val_459	30	2010-05-07
+459	val_459	30	2010-05-07
+462	val_462	30	2010-05-07
+462	val_462	30	2010-05-07
+466	val_466	30	2010-05-07
+466	val_466	30	2010-05-07
+466	val_466	30	2010-05-07
+477	val_477	30	2010-05-07
+480	val_480	30	2010-05-07
+480	val_480	30	2010-05-07
+480	val_480	30	2010-05-07
+484	val_484	30	2010-05-07
+491	val_491	30	2010-05-07
+495	val_495	30	2010-05-07
+51	val_51	30	2010-05-07
+51	val_51	30	2010-05-07
+66	val_66	30	2010-05-07
+77	val_77	30	2010-05-07
+8	val_8	30	2010-05-07
+80	val_80	30	2010-05-07
+84	val_84	30	2010-05-07
+84	val_84	30	2010-05-07
+95	val_95	30	2010-05-07
+95	val_95	30	2010-05-07
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl


[hive] 01/02: HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)

Posted by sa...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 83e39aeac870105a62decd97b48ea7b63c7a9043
Author: Deepak Jaiswal <dj...@apache.org>
AuthorDate: Tue Sep 25 23:26:17 2018 -0700

    HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)
---
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes
 ...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |   7 +-
 .../queries/clientpositive/load_data_using_job.q   |  18 +++-
 .../clientpositive/llap/load_data_using_job.q.out  | 110 ++++++++++++++++++++-
 5 files changed, 128 insertions(+), 7 deletions(-)

diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
new file mode 100644
index 0000000..020bdcc
Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 differ
diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
new file mode 100644
index 0000000..8c2604d
Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 differ
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index cbacd05..ee12f64 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -26,11 +26,13 @@ import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.ArrayList;
-import java.util.HashSet;
+
 
 import org.antlr.runtime.tree.Tree;
 import org.apache.commons.lang.StringUtils;
@@ -474,6 +476,9 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
     // wipe out partition columns
     tempTableObj.setPartCols(new ArrayList<>());
 
+    // Reset table params
+    tempTableObj.setParameters(new HashMap<>());
+
     // Set data location and input format, it must be text
     tempTableObj.setDataLocation(new Path(fromURI));
     if (inputFormatClassName != null && serDeClassName != null) {
diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q
index b760d9b..970a752 100644
--- a/ql/src/test/queries/clientpositive/load_data_using_job.q
+++ b/ql/src/test/queries/clientpositive/load_data_using_job.q
@@ -91,4 +91,20 @@ load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt
 INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
 SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
 select * from srcbucket_mapjoin_n8;
-drop table srcbucket_mapjoin_n8;
\ No newline at end of file
+drop table srcbucket_mapjoin_n8;
+
+-- Load into ACID table using ORC files
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.optimize.index.filter=true;
+set hive.tez.bucket.pruning=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
+explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
+load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
+
+select * from orc_test_txn;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
index 765ffdf..8a82467 100644
--- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
+++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
@@ -977,16 +977,16 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: srcbucket_mapjoin_n8__temp_table_for_load_data__
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: int), value (type: string)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: int)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -996,10 +996,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -3018,3 +3018,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@srcbucket_mapjoin_n8
 POSTHOOK: Output: default@srcbucket_mapjoin_n8
+PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test_txn
+POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_test_txn__temp_table_for_load_data__
+                  Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: id (type: int), name (type: string), dept (type: string), year (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                          name: default.orc_test_txn
+                      Write Type: INSERT
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            year 
+          replace: false
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.orc_test_txn
+          Write Type: INSERT
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+PREHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+POSTHOOK: Output: default@orc_test_txn@year=2016
+POSTHOOK: Output: default@orc_test_txn@year=2017
+POSTHOOK: Output: default@orc_test_txn@year=2018
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+PREHOOK: query: select * from orc_test_txn
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn
+PREHOOK: Input: default@orc_test_txn@year=2016
+PREHOOK: Input: default@orc_test_txn@year=2017
+PREHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test_txn
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn
+POSTHOOK: Input: default@orc_test_txn@year=2016
+POSTHOOK: Input: default@orc_test_txn@year=2017
+POSTHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+9	Harris	CSE	2017
+8	Henry	CSE	2016
+10	Haley	CSE	2018