You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/05/15 22:40:41 UTC
[21/50] [abbrv] hive git commit: HIVE-19453 : Extend Load Data
statement to take Input file format and Serde as parameters (Deepak Jaiswal,
reviewed by Jason Dere)
HIVE-19453 : Extend Load Data statement to take Input file format and Serde as parameters (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/32e29cc6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/32e29cc6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/32e29cc6
Branch: refs/heads/branch-3.0.0
Commit: 32e29cc63c41d722a4b2f8ffae4b9c3a660b8db4
Parents: b331338
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Wed May 9 11:06:34 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Fri May 11 10:55:14 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/HiveParser.g | 12 +++++--
.../hive/ql/parse/LoadSemanticAnalyzer.java | 33 ++++++++++++++++++--
.../clientpositive/load_data_using_job.q | 8 +++--
.../llap/load_data_using_job.q.out | 8 +++++
4 files changed, 54 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/32e29cc6/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index a837d67..3712a53 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -422,6 +422,7 @@ TOK_ADD_TRIGGER;
TOK_REPLACE;
TOK_LIKERP;
TOK_UNMANAGED;
+TOK_INPUTFORMAT;
}
@@ -835,8 +836,8 @@ execStatement
loadStatement
@init { pushMsg("load statement", state); }
@after { popMsg(state); }
- : KW_LOAD KW_DATA (islocal=KW_LOCAL)? KW_INPATH (path=StringLiteral) (isoverwrite=KW_OVERWRITE)? KW_INTO KW_TABLE (tab=tableOrPartition)
- -> ^(TOK_LOAD $path $tab $islocal? $isoverwrite?)
+ : KW_LOAD KW_DATA (islocal=KW_LOCAL)? KW_INPATH (path=StringLiteral) (isoverwrite=KW_OVERWRITE)? KW_INTO KW_TABLE (tab=tableOrPartition) inputFileFormat?
+ -> ^(TOK_LOAD $path $tab $islocal? $isoverwrite? inputFileFormat?)
;
replicationClause
@@ -1489,6 +1490,13 @@ fileFormat
| genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec)
;
+inputFileFormat
+@init { pushMsg("Load Data input file format specification", state); }
+@after { popMsg(state); }
+ : KW_INPUTFORMAT inFmt=StringLiteral KW_SERDE serdeCls=StringLiteral
+ -> ^(TOK_INPUTFORMAT $inFmt $serdeCls)
+ ;
+
tabTypeExpr
@init { pushMsg("specifying table types", state); }
@after { popMsg(state); }
http://git-wip-us.apache.org/repos/asf/hive/blob/32e29cc6/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 2b88ea6..866f43d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -79,6 +79,8 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
// AST specific data
private Tree fromTree, tableTree;
private boolean isLocal = false, isOverWrite = false;
+ private String inputFormatClassName = null;
+ private String serDeClassName = null;
public LoadSemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
@@ -257,12 +259,30 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
fromTree = ast.getChild(0);
tableTree = ast.getChild(1);
- if (ast.getChildCount() == 4) {
+ boolean inputInfo = false;
+ // Check the last node
+ ASTNode child = (ASTNode)ast.getChild(ast.getChildCount() - 1);
+ if (child.getToken().getType() == HiveParser.TOK_INPUTFORMAT) {
+ if (child.getChildCount() != 2) {
+ throw new SemanticException("FileFormat should contain both input format and Serde");
+ }
+ try {
+ inputFormatClassName = stripQuotes(child.getChild(0).getText());
+ serDeClassName = stripQuotes(child.getChild(1).getText());
+ inputInfo = true;
+ } catch (Exception e) {
+ throw new SemanticException("FileFormat inputFormatClassName or serDeClassName is incorrect");
+ }
+ }
+
+ if ((!inputInfo && ast.getChildCount() == 4) ||
+ (inputInfo && ast.getChildCount() == 5)) {
isLocal = true;
isOverWrite = true;
}
- if (ast.getChildCount() == 3) {
+ if ((!inputInfo && ast.getChildCount() == 3) ||
+ (inputInfo && ast.getChildCount() == 4)) {
if (ast.getChild(2).getText().toLowerCase().equals("local")) {
isLocal = true;
} else {
@@ -450,7 +470,14 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer {
// Set data location and input format, it must be text
tempTableObj.setDataLocation(new Path(fromURI));
- tempTableObj.setInputFormatClass(TextInputFormat.class);
+ if (inputFormatClassName != null && serDeClassName != null) {
+ try {
+ tempTableObj.setInputFormatClass(inputFormatClassName);
+ tempTableObj.setSerializationLib(serDeClassName);
+ } catch (HiveException e) {
+ throw new SemanticException("Load Data: Failed to set inputFormat or SerDe");
+ }
+ }
// Step 2 : create the Insert query
StringBuilder rewrittenQueryStr = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/hive/blob/32e29cc6/ql/src/test/queries/clientpositive/load_data_using_job.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/load_data_using_job.q b/ql/src/test/queries/clientpositive/load_data_using_job.q
index 3928f1f..3659b6e 100644
--- a/ql/src/test/queries/clientpositive/load_data_using_job.q
+++ b/ql/src/test/queries/clientpositive/load_data_using_job.q
@@ -84,7 +84,11 @@ drop table srcbucket_mapjoin;
-- Load into ORC table using text files
CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) STORED AS ORC;
-explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin;
-load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin;
+explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
+load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe';
select * from srcbucket_mapjoin;
drop table srcbucket_mapjoin;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/32e29cc6/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
index 116630c..c3b70a3 100644
--- a/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
+++ b/ql/src/test/results/clientpositive/llap/load_data_using_job.q.out
@@ -2776,8 +2776,12 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@srcbucket_mapjoin
PREHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
PREHOOK: type: QUERY
POSTHOOK: query: explain load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -2830,10 +2834,14 @@ STAGE PLANS:
Basic Stats Work:
PREHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__
PREHOOK: Output: default@srcbucket_mapjoin
POSTHOOK: query: load data local inpath '../../data/files/load_data_job/load_data_1_partition.txt' INTO TABLE srcbucket_mapjoin
+INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcbucket_mapjoin__TEMP_TABLE_FOR_LOAD_DATA__
POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08