You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2019/04/16 06:50:15 UTC
[hive] branch branch-3.1 updated (2b77d69 -> 8290316)
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a change to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git.
from 2b77d69 HIVE-21600: GenTezUtils.removeSemiJoinOperator may throw out of bounds exception for TS with multiple children (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
new 83e39ae HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)
new 8290316 HIVE-21564: Load data into a bucketed table is ignoring partitions specs and loads data into default partition (Sankar Hariappan, reviewed by Jesus Camacho Rodriguez)
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes
...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes
.../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 62 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 21 +-
.../queries/clientpositive/load_data_using_job.q | 18 +-
.../load_static_ptn_into_bucketed_table.q | 42 +
.../clientpositive/llap/load_data_using_job.q.out | 110 ++-
.../load_static_ptn_into_bucketed_table.q.out | 857 +++++++++++++++++++++
8 files changed, 1097 insertions(+), 13 deletions(-)
create mode 100644 data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
create mode 100644 data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
create mode 100644 ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
create mode 100644 ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out
[hive] 02/02: HIVE-21564: Load data into a bucketed table is
ignoring partitions specs and loads data into default partition (Sankar
Hariappan, reviewed by Jesus Camacho Rodriguez)
Posted by sa...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 8290316cd1af701e75adf1e357c6b3fe8409f68f
Author: Sankar Hariappan <sa...@apache.org>
AuthorDate: Tue Apr 16 11:52:20 2019 +0530
HIVE-21564: Load data into a bucketed table is ignoring partitions specs and loads data into default partition (Sankar Hariappan, reviewed by Jesus Camacho Rodriguez)
Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
.../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 57 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 21 +-
.../load_static_ptn_into_bucketed_table.q | 42 +
.../load_static_ptn_into_bucketed_table.q.out | 857 +++++++++++++++++++++
4 files changed, 970 insertions(+), 7 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index ee12f64..73a688b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -471,11 +471,6 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
String tempTblName = table.getTableName() + tempTblNameSuffix;
tempTableObj.setTableName(tempTblName);
- // Move all the partition columns at the end of table columns
- tempTableObj.setFields(table.getAllCols());
- // wipe out partition columns
- tempTableObj.setPartCols(new ArrayList<>());
-
// Reset table params
tempTableObj.setParameters(new HashMap<>());
@@ -490,12 +485,62 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
}
}
+ // Make the columns list for the temp table (input data file).
+ // Move all the partition columns at the end of table columns.
+ ArrayList<FieldSchema> colList = new ArrayList<FieldSchema>();
+ colList.addAll(table.getCols());
+
+ // inpPartSpec is a mapping from partition column name to its value.
+ Map<String, String> inpPartSpec = null;
+
+ // Partition spec was already validated by caller when create TableSpec object.
+ // So, need not validate inpPartSpec here.
+ List<FieldSchema> parts = table.getPartCols();
+ if (tableTree.getChildCount() >= 2) {
+ ASTNode partSpecNode = (ASTNode) tableTree.getChild(1);
+ inpPartSpec = new HashMap<>(partSpecNode.getChildCount());
+
+ for (int i = 0; i < partSpecNode.getChildCount(); ++i) {
+ ASTNode partSpecValNode = (ASTNode) partSpecNode.getChild(i);
+ String partVal = null;
+ String partColName = unescapeIdentifier(partSpecValNode.getChild(0).getText().toLowerCase());
+
+ if (partSpecValNode.getChildCount() >= 2) { // in the form of T partition (ds="2010-03-03")
+ // Not stripping quotes here as we need to use it as it is while framing PARTITION clause
+ // in INSERT query.
+ partVal = partSpecValNode.getChild(1).getText();
+ }
+ inpPartSpec.put(partColName, partVal);
+ }
+
+ // Add only dynamic partition columns to the temp table (input data file).
+ // For static partitions, values would be obtained from partition(key=value...) clause.
+ for (FieldSchema fs : parts) {
+ String partKey = fs.getName();
+
+ // If a partition value is not there, then it is dynamic partition key.
+ if (inpPartSpec.get(partKey) == null) {
+ colList.add(fs);
+ }
+ }
+ } else {
+ // No static partitions specified and hence all are dynamic partition keys and need to be part
+ // of temp table (input data file).
+ colList.addAll(parts);
+ }
+
+ // Set columns list for temp table.
+ tempTableObj.setFields(colList);
+
+ // Wipe out partition columns
+ tempTableObj.setPartCols(new ArrayList<>());
+
// Step 2 : create the Insert query
StringBuilder rewrittenQueryStr = new StringBuilder();
rewrittenQueryStr.append("insert into table ");
rewrittenQueryStr.append(getFullTableNameForSQL((ASTNode)(tableTree.getChild(0))));
- addPartitionColsToInsert(table.getPartCols(), rewrittenQueryStr);
+ addPartitionColsToInsert(table.getPartCols(), inpPartSpec, rewrittenQueryStr);
rewrittenQueryStr.append(" select * from ");
rewrittenQueryStr.append(tempTblName);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index f8a4e11..3eddace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -14924,6 +14924,20 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
*/
protected void addPartitionColsToInsert(List<FieldSchema> partCols, StringBuilder rewrittenQueryStr) {
+ addPartitionColsToInsert(partCols, null, rewrittenQueryStr);
+ }
+
+ /**
+ * Append list of partition columns to Insert statement. If user specified partition spec, then
+ * use it to get/set the value for partition column else use dynamic partition mode with no value.
+ * Static partition mode:
+ * INSERT INTO T PARTITION(partCol1=val1,partCol2...) SELECT col1, ... partCol1,partCol2...
+ * Dynamic partition mode:
+ * INSERT INTO T PARTITION(partCol1,partCol2...) SELECT col1, ... partCol1,partCol2...
+ */
+ protected void addPartitionColsToInsert(List<FieldSchema> partCols,
+ Map<String, String> partSpec,
+ StringBuilder rewrittenQueryStr) {
// If the table is partitioned we have to put the partition() clause in
if (partCols != null && partCols.size() > 0) {
rewrittenQueryStr.append(" partition (");
@@ -14933,8 +14947,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
first = false;
else
rewrittenQueryStr.append(", ");
- //would be nice if there was a way to determine if quotes are needed
+
+ // Would be nice if there was a way to determine if quotes are needed
rewrittenQueryStr.append(HiveUtils.unparseIdentifier(fschema.getName(), this.conf));
+ String partVal = (partSpec != null) ? partSpec.get(fschema.getName()) : null;
+ if (partVal != null) {
+ rewrittenQueryStr.append("=").append(partVal);
+ }
}
rewrittenQueryStr.append(")");
}
diff --git a/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q b/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
new file mode 100644
index 0000000..6b6d160
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/load_static_ptn_into_bucketed_table.q
@@ -0,0 +1,42 @@
+set hive.stats.column.autogather=false;
+set hive.strict.checks.bucketing=true;
+
+set hive.explain.user=false;
+
+-- SORT_QUERY_RESULTS
+
+-- Single key partition
+-- Load with full partition spec
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08');
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08');
+select * from src_bucket_tbl where ds='2008-04-08';
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec where dynamic partition value is not in file and expected to use default partition.
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10);
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10);
+select * from src_bucket_tbl where hr=10;
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec where dynamic partition value present in file.
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds);
+load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds);
+select * from src_bucket_tbl where hr=20 and ds='2008-04-08';
+
+drop table src_bucket_tbl;
+
+-- Multi key partition
+-- Load with both static and dynamic partition spec
+CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE;
+explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07');
+load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07');
+select * from src_bucket_tbl where hr=30 and ds='2010-05-07';
+
+drop table src_bucket_tbl;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out b/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out
new file mode 100644
index 0000000..d646077
--- /dev/null
+++ b/ql/src/test/results/clientpositive/load_static_ptn_into_bucketed_table.q.out
@@ -0,0 +1,857 @@
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_bucket_tbl__temp_table_for_load_data__
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008-04-08
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds='2008-04-08')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@ds=2008-04-08
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(ds=2008-04-08).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(ds=2008-04-08).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@ds=2008-04-08
+#### A masked pattern was here ####
+0 val_0 2008-04-08
+0 val_0 2008-04-08
+0 val_0 2008-04-08
+103 val_103 2008-04-08
+103 val_103 2008-04-08
+11 val_11 2008-04-08
+114 val_114 2008-04-08
+118 val_118 2008-04-08
+118 val_118 2008-04-08
+125 val_125 2008-04-08
+125 val_125 2008-04-08
+129 val_129 2008-04-08
+129 val_129 2008-04-08
+136 val_136 2008-04-08
+143 val_143 2008-04-08
+15 val_15 2008-04-08
+15 val_15 2008-04-08
+150 val_150 2008-04-08
+158 val_158 2008-04-08
+165 val_165 2008-04-08
+165 val_165 2008-04-08
+169 val_169 2008-04-08
+169 val_169 2008-04-08
+169 val_169 2008-04-08
+169 val_169 2008-04-08
+172 val_172 2008-04-08
+172 val_172 2008-04-08
+176 val_176 2008-04-08
+176 val_176 2008-04-08
+183 val_183 2008-04-08
+187 val_187 2008-04-08
+187 val_187 2008-04-08
+187 val_187 2008-04-08
+19 val_19 2008-04-08
+190 val_190 2008-04-08
+194 val_194 2008-04-08
+202 val_202 2008-04-08
+213 val_213 2008-04-08
+213 val_213 2008-04-08
+217 val_217 2008-04-08
+217 val_217 2008-04-08
+224 val_224 2008-04-08
+224 val_224 2008-04-08
+228 val_228 2008-04-08
+235 val_235 2008-04-08
+239 val_239 2008-04-08
+239 val_239 2008-04-08
+242 val_242 2008-04-08
+242 val_242 2008-04-08
+257 val_257 2008-04-08
+26 val_26 2008-04-08
+26 val_26 2008-04-08
+260 val_260 2008-04-08
+275 val_275 2008-04-08
+282 val_282 2008-04-08
+282 val_282 2008-04-08
+286 val_286 2008-04-08
+305 val_305 2008-04-08
+309 val_309 2008-04-08
+309 val_309 2008-04-08
+316 val_316 2008-04-08
+316 val_316 2008-04-08
+316 val_316 2008-04-08
+323 val_323 2008-04-08
+327 val_327 2008-04-08
+327 val_327 2008-04-08
+327 val_327 2008-04-08
+33 val_33 2008-04-08
+338 val_338 2008-04-08
+341 val_341 2008-04-08
+345 val_345 2008-04-08
+356 val_356 2008-04-08
+367 val_367 2008-04-08
+367 val_367 2008-04-08
+37 val_37 2008-04-08
+37 val_37 2008-04-08
+374 val_374 2008-04-08
+378 val_378 2008-04-08
+389 val_389 2008-04-08
+392 val_392 2008-04-08
+396 val_396 2008-04-08
+396 val_396 2008-04-08
+396 val_396 2008-04-08
+4 val_4 2008-04-08
+400 val_400 2008-04-08
+404 val_404 2008-04-08
+404 val_404 2008-04-08
+411 val_411 2008-04-08
+419 val_419 2008-04-08
+437 val_437 2008-04-08
+44 val_44 2008-04-08
+444 val_444 2008-04-08
+448 val_448 2008-04-08
+455 val_455 2008-04-08
+459 val_459 2008-04-08
+459 val_459 2008-04-08
+462 val_462 2008-04-08
+462 val_462 2008-04-08
+466 val_466 2008-04-08
+466 val_466 2008-04-08
+466 val_466 2008-04-08
+477 val_477 2008-04-08
+480 val_480 2008-04-08
+480 val_480 2008-04-08
+480 val_480 2008-04-08
+484 val_484 2008-04-08
+491 val_491 2008-04-08
+495 val_495 2008-04-08
+51 val_51 2008-04-08
+51 val_51 2008-04-08
+66 val_66 2008-04-08
+77 val_77 2008-04-08
+8 val_8 2008-04-08
+80 val_80 2008-04-08
+84 val_84 2008-04-08
+84 val_84 2008-04-08
+95 val_95 2008-04-08
+95 val_95 2008-04-08
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=10
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_bucket_tbl__temp_table_for_load_data__
+ Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string), _bucket_number (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: string)
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._bucket_number (type: string)
+ outputColumnNames: _col0, _col1, _col2, _bucket_number
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_BUCKET_SORTED
+ Statistics: Num rows: 35 Data size: 13020 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds
+ hr 10
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=10
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(ds, hr=10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=10,ds=__HIVE_DEFAULT_PARTITION__).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=10,ds=__HIVE_DEFAULT_PARTITION__).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=10/ds=__HIVE_DEFAULT_PARTITION__
+#### A masked pattern was here ####
+0 val_0 10 __HIVE_DEFAULT_PARTITION__
+0 val_0 10 __HIVE_DEFAULT_PARTITION__
+0 val_0 10 __HIVE_DEFAULT_PARTITION__
+103 val_103 10 __HIVE_DEFAULT_PARTITION__
+103 val_103 10 __HIVE_DEFAULT_PARTITION__
+11 val_11 10 __HIVE_DEFAULT_PARTITION__
+114 val_114 10 __HIVE_DEFAULT_PARTITION__
+118 val_118 10 __HIVE_DEFAULT_PARTITION__
+118 val_118 10 __HIVE_DEFAULT_PARTITION__
+125 val_125 10 __HIVE_DEFAULT_PARTITION__
+125 val_125 10 __HIVE_DEFAULT_PARTITION__
+129 val_129 10 __HIVE_DEFAULT_PARTITION__
+129 val_129 10 __HIVE_DEFAULT_PARTITION__
+136 val_136 10 __HIVE_DEFAULT_PARTITION__
+143 val_143 10 __HIVE_DEFAULT_PARTITION__
+15 val_15 10 __HIVE_DEFAULT_PARTITION__
+15 val_15 10 __HIVE_DEFAULT_PARTITION__
+150 val_150 10 __HIVE_DEFAULT_PARTITION__
+158 val_158 10 __HIVE_DEFAULT_PARTITION__
+165 val_165 10 __HIVE_DEFAULT_PARTITION__
+165 val_165 10 __HIVE_DEFAULT_PARTITION__
+169 val_169 10 __HIVE_DEFAULT_PARTITION__
+169 val_169 10 __HIVE_DEFAULT_PARTITION__
+169 val_169 10 __HIVE_DEFAULT_PARTITION__
+169 val_169 10 __HIVE_DEFAULT_PARTITION__
+172 val_172 10 __HIVE_DEFAULT_PARTITION__
+172 val_172 10 __HIVE_DEFAULT_PARTITION__
+176 val_176 10 __HIVE_DEFAULT_PARTITION__
+176 val_176 10 __HIVE_DEFAULT_PARTITION__
+183 val_183 10 __HIVE_DEFAULT_PARTITION__
+187 val_187 10 __HIVE_DEFAULT_PARTITION__
+187 val_187 10 __HIVE_DEFAULT_PARTITION__
+187 val_187 10 __HIVE_DEFAULT_PARTITION__
+19 val_19 10 __HIVE_DEFAULT_PARTITION__
+190 val_190 10 __HIVE_DEFAULT_PARTITION__
+194 val_194 10 __HIVE_DEFAULT_PARTITION__
+202 val_202 10 __HIVE_DEFAULT_PARTITION__
+213 val_213 10 __HIVE_DEFAULT_PARTITION__
+213 val_213 10 __HIVE_DEFAULT_PARTITION__
+217 val_217 10 __HIVE_DEFAULT_PARTITION__
+217 val_217 10 __HIVE_DEFAULT_PARTITION__
+224 val_224 10 __HIVE_DEFAULT_PARTITION__
+224 val_224 10 __HIVE_DEFAULT_PARTITION__
+228 val_228 10 __HIVE_DEFAULT_PARTITION__
+235 val_235 10 __HIVE_DEFAULT_PARTITION__
+239 val_239 10 __HIVE_DEFAULT_PARTITION__
+239 val_239 10 __HIVE_DEFAULT_PARTITION__
+242 val_242 10 __HIVE_DEFAULT_PARTITION__
+242 val_242 10 __HIVE_DEFAULT_PARTITION__
+257 val_257 10 __HIVE_DEFAULT_PARTITION__
+26 val_26 10 __HIVE_DEFAULT_PARTITION__
+26 val_26 10 __HIVE_DEFAULT_PARTITION__
+260 val_260 10 __HIVE_DEFAULT_PARTITION__
+275 val_275 10 __HIVE_DEFAULT_PARTITION__
+282 val_282 10 __HIVE_DEFAULT_PARTITION__
+282 val_282 10 __HIVE_DEFAULT_PARTITION__
+286 val_286 10 __HIVE_DEFAULT_PARTITION__
+305 val_305 10 __HIVE_DEFAULT_PARTITION__
+309 val_309 10 __HIVE_DEFAULT_PARTITION__
+309 val_309 10 __HIVE_DEFAULT_PARTITION__
+316 val_316 10 __HIVE_DEFAULT_PARTITION__
+316 val_316 10 __HIVE_DEFAULT_PARTITION__
+316 val_316 10 __HIVE_DEFAULT_PARTITION__
+323 val_323 10 __HIVE_DEFAULT_PARTITION__
+327 val_327 10 __HIVE_DEFAULT_PARTITION__
+327 val_327 10 __HIVE_DEFAULT_PARTITION__
+327 val_327 10 __HIVE_DEFAULT_PARTITION__
+33 val_33 10 __HIVE_DEFAULT_PARTITION__
+338 val_338 10 __HIVE_DEFAULT_PARTITION__
+341 val_341 10 __HIVE_DEFAULT_PARTITION__
+345 val_345 10 __HIVE_DEFAULT_PARTITION__
+356 val_356 10 __HIVE_DEFAULT_PARTITION__
+367 val_367 10 __HIVE_DEFAULT_PARTITION__
+367 val_367 10 __HIVE_DEFAULT_PARTITION__
+37 val_37 10 __HIVE_DEFAULT_PARTITION__
+37 val_37 10 __HIVE_DEFAULT_PARTITION__
+374 val_374 10 __HIVE_DEFAULT_PARTITION__
+378 val_378 10 __HIVE_DEFAULT_PARTITION__
+389 val_389 10 __HIVE_DEFAULT_PARTITION__
+392 val_392 10 __HIVE_DEFAULT_PARTITION__
+396 val_396 10 __HIVE_DEFAULT_PARTITION__
+396 val_396 10 __HIVE_DEFAULT_PARTITION__
+396 val_396 10 __HIVE_DEFAULT_PARTITION__
+4 val_4 10 __HIVE_DEFAULT_PARTITION__
+400 val_400 10 __HIVE_DEFAULT_PARTITION__
+404 val_404 10 __HIVE_DEFAULT_PARTITION__
+404 val_404 10 __HIVE_DEFAULT_PARTITION__
+411 val_411 10 __HIVE_DEFAULT_PARTITION__
+419 val_419 10 __HIVE_DEFAULT_PARTITION__
+437 val_437 10 __HIVE_DEFAULT_PARTITION__
+44 val_44 10 __HIVE_DEFAULT_PARTITION__
+444 val_444 10 __HIVE_DEFAULT_PARTITION__
+448 val_448 10 __HIVE_DEFAULT_PARTITION__
+455 val_455 10 __HIVE_DEFAULT_PARTITION__
+459 val_459 10 __HIVE_DEFAULT_PARTITION__
+459 val_459 10 __HIVE_DEFAULT_PARTITION__
+462 val_462 10 __HIVE_DEFAULT_PARTITION__
+462 val_462 10 __HIVE_DEFAULT_PARTITION__
+466 val_466 10 __HIVE_DEFAULT_PARTITION__
+466 val_466 10 __HIVE_DEFAULT_PARTITION__
+466 val_466 10 __HIVE_DEFAULT_PARTITION__
+477 val_477 10 __HIVE_DEFAULT_PARTITION__
+480 val_480 10 __HIVE_DEFAULT_PARTITION__
+480 val_480 10 __HIVE_DEFAULT_PARTITION__
+480 val_480 10 __HIVE_DEFAULT_PARTITION__
+484 val_484 10 __HIVE_DEFAULT_PARTITION__
+491 val_491 10 __HIVE_DEFAULT_PARTITION__
+495 val_495 10 __HIVE_DEFAULT_PARTITION__
+51 val_51 10 __HIVE_DEFAULT_PARTITION__
+51 val_51 10 __HIVE_DEFAULT_PARTITION__
+66 val_66 10 __HIVE_DEFAULT_PARTITION__
+77 val_77 10 __HIVE_DEFAULT_PARTITION__
+8 val_8 10 __HIVE_DEFAULT_PARTITION__
+80 val_80 10 __HIVE_DEFAULT_PARTITION__
+84 val_84 10 __HIVE_DEFAULT_PARTITION__
+84 val_84 10 __HIVE_DEFAULT_PARTITION__
+95 val_95 10 __HIVE_DEFAULT_PARTITION__
+95 val_95 10 __HIVE_DEFAULT_PARTITION__
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=20
+POSTHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_bucket_tbl__temp_table_for_load_data__
+ Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string), ds (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string), _bucket_number (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col2 (type: string)
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._bucket_number (type: string)
+ outputColumnNames: _col0, _col1, _col2, _bucket_number
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_BUCKET_SORTED
+ Statistics: Num rows: 68 Data size: 24552 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds
+ hr 20
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=20
+POSTHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE src_bucket_tbl partition(hr=20, ds)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=20/ds=2008-04-08
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=20,ds=2008-04-08).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=20,ds=2008-04-08).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=20 and ds='2008-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=20/ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=20 and ds='2008-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=20/ds=2008-04-08
+#### A masked pattern was here ####
+0 val_0 20 2008-04-08
+0 val_0 20 2008-04-08
+0 val_0 20 2008-04-08
+103 val_103 20 2008-04-08
+103 val_103 20 2008-04-08
+11 val_11 20 2008-04-08
+114 val_114 20 2008-04-08
+118 val_118 20 2008-04-08
+118 val_118 20 2008-04-08
+125 val_125 20 2008-04-08
+125 val_125 20 2008-04-08
+129 val_129 20 2008-04-08
+129 val_129 20 2008-04-08
+136 val_136 20 2008-04-08
+143 val_143 20 2008-04-08
+15 val_15 20 2008-04-08
+15 val_15 20 2008-04-08
+150 val_150 20 2008-04-08
+158 val_158 20 2008-04-08
+165 val_165 20 2008-04-08
+165 val_165 20 2008-04-08
+169 val_169 20 2008-04-08
+169 val_169 20 2008-04-08
+169 val_169 20 2008-04-08
+169 val_169 20 2008-04-08
+172 val_172 20 2008-04-08
+172 val_172 20 2008-04-08
+176 val_176 20 2008-04-08
+176 val_176 20 2008-04-08
+183 val_183 20 2008-04-08
+187 val_187 20 2008-04-08
+187 val_187 20 2008-04-08
+187 val_187 20 2008-04-08
+19 val_19 20 2008-04-08
+190 val_190 20 2008-04-08
+194 val_194 20 2008-04-08
+202 val_202 20 2008-04-08
+213 val_213 20 2008-04-08
+213 val_213 20 2008-04-08
+217 val_217 20 2008-04-08
+217 val_217 20 2008-04-08
+224 val_224 20 2008-04-08
+224 val_224 20 2008-04-08
+228 val_228 20 2008-04-08
+235 val_235 20 2008-04-08
+239 val_239 20 2008-04-08
+239 val_239 20 2008-04-08
+242 val_242 20 2008-04-08
+242 val_242 20 2008-04-08
+257 val_257 20 2008-04-08
+26 val_26 20 2008-04-08
+26 val_26 20 2008-04-08
+260 val_260 20 2008-04-08
+275 val_275 20 2008-04-08
+282 val_282 20 2008-04-08
+282 val_282 20 2008-04-08
+286 val_286 20 2008-04-08
+305 val_305 20 2008-04-08
+309 val_309 20 2008-04-08
+309 val_309 20 2008-04-08
+316 val_316 20 2008-04-08
+316 val_316 20 2008-04-08
+316 val_316 20 2008-04-08
+323 val_323 20 2008-04-08
+327 val_327 20 2008-04-08
+327 val_327 20 2008-04-08
+327 val_327 20 2008-04-08
+33 val_33 20 2008-04-08
+338 val_338 20 2008-04-08
+341 val_341 20 2008-04-08
+345 val_345 20 2008-04-08
+356 val_356 20 2008-04-08
+367 val_367 20 2008-04-08
+367 val_367 20 2008-04-08
+37 val_37 20 2008-04-08
+37 val_37 20 2008-04-08
+374 val_374 20 2008-04-08
+378 val_378 20 2008-04-08
+389 val_389 20 2008-04-08
+392 val_392 20 2008-04-08
+396 val_396 20 2008-04-08
+396 val_396 20 2008-04-08
+396 val_396 20 2008-04-08
+4 val_4 20 2008-04-08
+400 val_400 20 2008-04-08
+404 val_404 20 2008-04-08
+404 val_404 20 2008-04-08
+411 val_411 20 2008-04-08
+419 val_419 20 2008-04-08
+437 val_437 20 2008-04-08
+44 val_44 20 2008-04-08
+444 val_444 20 2008-04-08
+448 val_448 20 2008-04-08
+455 val_455 20 2008-04-08
+459 val_459 20 2008-04-08
+459 val_459 20 2008-04-08
+462 val_462 20 2008-04-08
+462 val_462 20 2008-04-08
+466 val_466 20 2008-04-08
+466 val_466 20 2008-04-08
+466 val_466 20 2008-04-08
+477 val_477 20 2008-04-08
+480 val_480 20 2008-04-08
+480 val_480 20 2008-04-08
+480 val_480 20 2008-04-08
+484 val_484 20 2008-04-08
+491 val_491 20 2008-04-08
+495 val_495 20 2008-04-08
+51 val_51 20 2008-04-08
+51 val_51 20 2008-04-08
+66 val_66 20 2008-04-08
+77 val_77 20 2008-04-08
+8 val_8 20 2008-04-08
+80 val_80 20 2008-04-08
+84 val_84 20 2008-04-08
+84 val_84 20 2008-04-08
+95 val_95 20 2008-04-08
+95 val_95 20 2008-04-08
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: CREATE TABLE src_bucket_tbl(key int, value string) partitioned by (hr int, ds string) clustered by (key) into 1 buckets STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_bucket_tbl
+PREHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: query: explain load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_bucket_tbl__temp_table_for_load_data__
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2010-05-07
+ hr 30
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.src_bucket_tbl
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+PREHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0' INTO TABLE src_bucket_tbl partition(hr=30, ds='2010-05-07')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl__temp_table_for_load_data__
+POSTHOOK: Output: default@src_bucket_tbl@hr=30/ds=2010-05-07
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=30,ds=2010-05-07).key SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: src_bucket_tbl PARTITION(hr=30,ds=2010-05-07).value SIMPLE [(src_bucket_tbl__temp_table_for_load_data__)src_bucket_tbl__temp_table_for_load_data__.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select * from src_bucket_tbl where hr=30 and ds='2010-05-07'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Input: default@src_bucket_tbl@hr=30/ds=2010-05-07
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src_bucket_tbl where hr=30 and ds='2010-05-07'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Input: default@src_bucket_tbl@hr=30/ds=2010-05-07
+#### A masked pattern was here ####
+0 val_0 30 2010-05-07
+0 val_0 30 2010-05-07
+0 val_0 30 2010-05-07
+103 val_103 30 2010-05-07
+103 val_103 30 2010-05-07
+11 val_11 30 2010-05-07
+114 val_114 30 2010-05-07
+118 val_118 30 2010-05-07
+118 val_118 30 2010-05-07
+125 val_125 30 2010-05-07
+125 val_125 30 2010-05-07
+129 val_129 30 2010-05-07
+129 val_129 30 2010-05-07
+136 val_136 30 2010-05-07
+143 val_143 30 2010-05-07
+15 val_15 30 2010-05-07
+15 val_15 30 2010-05-07
+150 val_150 30 2010-05-07
+158 val_158 30 2010-05-07
+165 val_165 30 2010-05-07
+165 val_165 30 2010-05-07
+169 val_169 30 2010-05-07
+169 val_169 30 2010-05-07
+169 val_169 30 2010-05-07
+169 val_169 30 2010-05-07
+172 val_172 30 2010-05-07
+172 val_172 30 2010-05-07
+176 val_176 30 2010-05-07
+176 val_176 30 2010-05-07
+183 val_183 30 2010-05-07
+187 val_187 30 2010-05-07
+187 val_187 30 2010-05-07
+187 val_187 30 2010-05-07
+19 val_19 30 2010-05-07
+190 val_190 30 2010-05-07
+194 val_194 30 2010-05-07
+202 val_202 30 2010-05-07
+213 val_213 30 2010-05-07
+213 val_213 30 2010-05-07
+217 val_217 30 2010-05-07
+217 val_217 30 2010-05-07
+224 val_224 30 2010-05-07
+224 val_224 30 2010-05-07
+228 val_228 30 2010-05-07
+235 val_235 30 2010-05-07
+239 val_239 30 2010-05-07
+239 val_239 30 2010-05-07
+242 val_242 30 2010-05-07
+242 val_242 30 2010-05-07
+257 val_257 30 2010-05-07
+26 val_26 30 2010-05-07
+26 val_26 30 2010-05-07
+260 val_260 30 2010-05-07
+275 val_275 30 2010-05-07
+282 val_282 30 2010-05-07
+282 val_282 30 2010-05-07
+286 val_286 30 2010-05-07
+305 val_305 30 2010-05-07
+309 val_309 30 2010-05-07
+309 val_309 30 2010-05-07
+316 val_316 30 2010-05-07
+316 val_316 30 2010-05-07
+316 val_316 30 2010-05-07
+323 val_323 30 2010-05-07
+327 val_327 30 2010-05-07
+327 val_327 30 2010-05-07
+327 val_327 30 2010-05-07
+33 val_33 30 2010-05-07
+338 val_338 30 2010-05-07
+341 val_341 30 2010-05-07
+345 val_345 30 2010-05-07
+356 val_356 30 2010-05-07
+367 val_367 30 2010-05-07
+367 val_367 30 2010-05-07
+37 val_37 30 2010-05-07
+37 val_37 30 2010-05-07
+374 val_374 30 2010-05-07
+378 val_378 30 2010-05-07
+389 val_389 30 2010-05-07
+392 val_392 30 2010-05-07
+396 val_396 30 2010-05-07
+396 val_396 30 2010-05-07
+396 val_396 30 2010-05-07
+4 val_4 30 2010-05-07
+400 val_400 30 2010-05-07
+404 val_404 30 2010-05-07
+404 val_404 30 2010-05-07
+411 val_411 30 2010-05-07
+419 val_419 30 2010-05-07
+437 val_437 30 2010-05-07
+44 val_44 30 2010-05-07
+444 val_444 30 2010-05-07
+448 val_448 30 2010-05-07
+455 val_455 30 2010-05-07
+459 val_459 30 2010-05-07
+459 val_459 30 2010-05-07
+462 val_462 30 2010-05-07
+462 val_462 30 2010-05-07
+466 val_466 30 2010-05-07
+466 val_466 30 2010-05-07
+466 val_466 30 2010-05-07
+477 val_477 30 2010-05-07
+480 val_480 30 2010-05-07
+480 val_480 30 2010-05-07
+480 val_480 30 2010-05-07
+484 val_484 30 2010-05-07
+491 val_491 30 2010-05-07
+495 val_495 30 2010-05-07
+51 val_51 30 2010-05-07
+51 val_51 30 2010-05-07
+66 val_66 30 2010-05-07
+77 val_77 30 2010-05-07
+8 val_8 30 2010-05-07
+80 val_80 30 2010-05-07
+84 val_84 30 2010-05-07
+84 val_84 30 2010-05-07
+95 val_95 30 2010-05-07
+95 val_95 30 2010-05-07
+PREHOOK: query: drop table src_bucket_tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_bucket_tbl
+PREHOOK: Output: default@src_bucket_tbl
+POSTHOOK: query: drop table src_bucket_tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_bucket_tbl
+POSTHOOK: Output: default@src_bucket_tbl
[hive] 01/02: HIVE-20593 : Load Data for partitioned ACID tables
fails with bucketId out of range: -1 (Deepak Jaiswal,
reviewed by Eugene Koifman)
Posted by sa...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 83e39aeac870105a62decd97b48ea7b63c7a9043
Author: Deepak Jaiswal <dj...@apache.org>
AuthorDate: Tue Sep 25 23:26:17 2018 -0700
HIVE-20593 : Load Data for partitioned ACID tables fails with bucketId out of range: -1 (Deepak Jaiswal, reviewed by Eugene Koifman)
---
...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 | Bin 0 -> 501 bytes
...230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 | Bin 0 -> 465 bytes
.../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 7 +-
.../queries/clientpositive/load_data_using_job.q | 18 +++-
.../clientpositive/llap/load_data_using_job.q.out | 110 ++++++++++++++++++++-
5 files changed, 128 insertions(+), 7 deletions(-)
diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0
new file mode 100644
index 0000000..020bdcc
Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_0_0 differ
diff --git a/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0
new file mode 100644
index 0000000..8c2604d
Binary files /dev/null and b/data/files/load_data_job_acid/20180918230307-b382b8c7-271c-4025-be64-4a68f4db32e5_1_0 differ
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index cbacd05..ee12f64 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -26,11 +26,13 @@ import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.ArrayList;
-import java.util.HashSet;
+
import org.antlr.runtime.tree.Tree;
import org.apache.commons.lang.StringUtils;
@@ -474,6 +476,9 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
// wipe out partition columns
tempTableObj.setPartCols(new ArrayList<>());
+ // Reset table params
+ tempTableObj.setParameters(new HashMap<>());
+
// Set data location and input format, it must be text
tempTableObj.setDataLocation(new Path(fromURI));
if (inputFormatClassName != null && serDeClassName != null) {
diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q
index b760d9b..970a752 100644
--- a/ql/src/test/queries/clientpositive/load_data_using_job.q
+++ b/ql/src/test/queries/clientpositive/load_data_using_job.q
@@ -91,4 +91,20 @@ load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
select * from srcbucket_mapjoin_n8;
-drop table srcbucket_mapjoin_n8;
\ No newline at end of file
+drop table srcbucket_mapjoin_n8;
+
+-- Load into ACID table using ORC files
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.optimize.index.filter=true;
+set hive.tez.bucket.pruning=true;
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true');
+explain load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
+load data local inpath '../../data/files/load_data_job_acid' into table orc_test_txn;
+
+select * from orc_test_txn;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
index 765ffdf..8a82467 100644
--- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
+++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
@@ -977,16 +977,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcbucket_mapjoin_n8__temp_table_for_load_data__
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -996,10 +996,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 8648 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -3018,3 +3018,103 @@ POSTHOOK: query: drop table srcbucket_mapjoin_n8
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@srcbucket_mapjoin_n8
POSTHOOK: Output: default@srcbucket_mapjoin_n8
+PREHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test_txn
+POSTHOOK: query: CREATE TABLE orc_test_txn (`id` integer, name string, dept string) PARTITIONED BY (year integer) STORED AS ORC TBLPROPERTIES('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orc_test_txn__temp_table_for_load_data__
+ Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), name (type: string), dept (type: string), year (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 24 Data size: 9024 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orc_test_txn
+ Write Type: INSERT
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ year
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.orc_test_txn
+ Write Type: INSERT
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+PREHOOK: Output: default@orc_test_txn
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn__temp_table_for_load_data__
+POSTHOOK: Output: default@orc_test_txn@year=2016
+POSTHOOK: Output: default@orc_test_txn@year=2017
+POSTHOOK: Output: default@orc_test_txn@year=2018
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2016).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2017).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).dept SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:dept, type:string, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).id SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: orc_test_txn PARTITION(year=2018).name SIMPLE [(orc_test_txn__temp_table_for_load_data__)orc_test_txn__temp_table_for_load_data__.FieldSchema(name:name, type:string, comment:null), ]
+PREHOOK: query: select * from orc_test_txn
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test_txn
+PREHOOK: Input: default@orc_test_txn@year=2016
+PREHOOK: Input: default@orc_test_txn@year=2017
+PREHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test_txn
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test_txn
+POSTHOOK: Input: default@orc_test_txn@year=2016
+POSTHOOK: Input: default@orc_test_txn@year=2017
+POSTHOOK: Input: default@orc_test_txn@year=2018
+#### A masked pattern was here ####
+9 Harris CSE 2017
+8 Henry CSE 2016
+10 Haley CSE 2018