You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/09 09:08:36 UTC

[06/50] [abbrv] hive git commit: HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)

HIVE-11658: Load data file format validation does not work with directories (Prasanth Jayachandran reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9670a2b3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9670a2b3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9670a2b3

Branch: refs/heads/beeline-cli
Commit: 9670a2b3c35dfc3b9f61481b7ea8fcefbb01571c
Parents: b247cac
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Thu Aug 27 11:43:25 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Thu Aug 27 11:43:25 2015 -0500

----------------------------------------------------------------------
 .../hive/ql/parse/LoadSemanticAnalyzer.java     | 38 +++++++++++---------
 .../queries/clientnegative/load_orc_negative3.q |  6 ++++
 .../test/queries/clientpositive/load_orc_part.q |  4 +++
 .../clientnegative/load_orc_negative3.q.out     | 25 +++++++++++++
 .../results/clientpositive/load_orc_part.q.out  | 18 ++++++++++
 5 files changed, 75 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 85fa9c9..9d2702f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -128,9 +128,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     return new URI(fromScheme, fromAuthority, path, null, null);
   }
 
-  private void applyConstraints(URI fromURI, URI toURI, Tree ast,
+  private FileStatus[] applyConstraintsAndGetFiles(URI fromURI, URI toURI, Tree ast,
       boolean isLocal) throws SemanticException {
 
+    FileStatus[] srcs = null;
+
     // local mode implies that scheme should be "file"
     // we can change this going forward
     if (isLocal && !fromURI.getScheme().equals("file")) {
@@ -139,7 +141,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     try {
-      FileStatus[] srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
+      srcs = matchFilesOrDir(FileSystem.get(fromURI, conf), new Path(fromURI));
       if (srcs == null || srcs.length == 0) {
         throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast,
             "No files matching path " + fromURI));
@@ -168,6 +170,8 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
           + "\"hive.metastore.warehouse.dir\" do not conflict.";
       throw new SemanticException(ErrorMsg.ILLEGAL_PATH.getMsg(ast, reason));
     }
+
+    return srcs;
   }
 
   @Override
@@ -227,11 +231,11 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     // make sure the arguments make sense
-    applyConstraints(fromURI, toURI, fromTree, isLocal);
+    FileStatus[] files = applyConstraintsAndGetFiles(fromURI, toURI, fromTree, isLocal);
 
     // for managed tables, make sure the file formats match
     if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType())) {
-      ensureFileFormatsMatch(ts, fromURI);
+      ensureFileFormatsMatch(ts, files);
     }
     inputs.add(toReadEntity(new Path(fromURI)));
     Task<? extends Serializable> rTask = null;
@@ -325,7 +329,7 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     }
   }
 
-  private void ensureFileFormatsMatch(TableSpec ts, URI fromURI) throws SemanticException {
+  private void ensureFileFormatsMatch(TableSpec ts, FileStatus[] fileStatuses) throws SemanticException {
     final Class<? extends InputFormat> destInputFormat;
     try {
       if (ts.getPartSpec() == null || ts.getPartSpec().isEmpty()) {
@@ -340,17 +344,19 @@ public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
     // Other file formats should do similar check to make sure file formats match
     // when doing LOAD DATA .. INTO TABLE
     if (OrcInputFormat.class.equals(destInputFormat)) {
-      Path inputFilePath = new Path(fromURI);
-      try {
-        FileSystem fs = FileSystem.get(fromURI, conf);
-        // just creating orc reader is going to do sanity checks to make sure its valid ORC file
-        OrcFile.createReader(fs, inputFilePath);
-      } catch (FileFormatException e) {
-        throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
-            " table is stored as ORC but the file being loaded is not a valid ORC file."));
-      } catch (IOException e) {
-        throw new SemanticException("Unable to load data to destination table." +
-            " Error: " + e.getMessage());
+      for (FileStatus fileStatus : fileStatuses) {
+        try {
+          Path filePath = fileStatus.getPath();
+          FileSystem fs = FileSystem.get(filePath.toUri(), conf);
+          // just creating orc reader is going to do sanity checks to make sure its valid ORC file
+          OrcFile.createReader(fs, filePath);
+        } catch (FileFormatException e) {
+          throw new SemanticException(ErrorMsg.INVALID_FILE_FORMAT_IN_LOAD.getMsg("Destination" +
+              " table is stored as ORC but the file being loaded is not a valid ORC file."));
+        } catch (IOException e) {
+          throw new SemanticException("Unable to load data to destination table." +
+              " Error: " + e.getMessage());
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientnegative/load_orc_negative3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/load_orc_negative3.q b/ql/src/test/queries/clientnegative/load_orc_negative3.q
new file mode 100644
index 0000000..9a4116e
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/load_orc_negative3.q
@@ -0,0 +1,6 @@
+create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data local inpath '../../data/files/kv1.txt' into table text_test;
+
+set hive.default.fileformat=ORC;
+create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp);
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/text_test/' into table orc_test;

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/queries/clientpositive/load_orc_part.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/load_orc_part.q b/ql/src/test/queries/clientpositive/load_orc_part.q
index 0927ea4..2ff884d 100644
--- a/ql/src/test/queries/clientpositive/load_orc_part.q
+++ b/ql/src/test/queries/clientpositive/load_orc_part.q
@@ -9,6 +9,10 @@ load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/orc_split
 load data local inpath '../../data/files/orc_split_elim.orc' into table orc_test partition (ds='10');
 dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
 
+load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging;
+load data inpath '${hiveconf:hive.metastore.warehouse.dir}/orc_staging/' overwrite into table orc_test partition (ds='10');
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_test/ds=10/;
+
 alter table orc_test add partition(ds='11');
 alter table orc_test partition(ds='11') set fileformat textfile;
 load data local inpath '../../data/files/kv1.txt' into table orc_test partition(ds='11');

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientnegative/load_orc_negative3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/load_orc_negative3.q.out b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
new file mode 100644
index 0000000..77fb50e
--- /dev/null
+++ b/ql/src/test/results/clientnegative/load_orc_negative3.q.out
@@ -0,0 +1,25 @@
+PREHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_test
+POSTHOOK: query: create table text_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_test
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@text_test
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table text_test
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@text_test
+PREHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: create table orc_test (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test
+FAILED: SemanticException [Error 30019]: The file that you are trying to load does not match the file format of the destination table. Destination table is stored as ORC but the file being loaded is not a valid ORC file.

http://git-wip-us.apache.org/repos/asf/hive/blob/9670a2b3/ql/src/test/results/clientpositive/load_orc_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/load_orc_part.q.out b/ql/src/test/results/clientpositive/load_orc_part.q.out
index 34ca493..2e02c2e 100644
--- a/ql/src/test/results/clientpositive/load_orc_part.q.out
+++ b/ql/src/test/results/clientpositive/load_orc_part.q.out
@@ -42,6 +42,24 @@ POSTHOOK: type: LOAD
 POSTHOOK: Output: default@orc_test@ds=10
 Found 2 items
 #### A masked pattern was here ####
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_staging
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' overwrite into table orc_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_staging
+#### A masked pattern was here ####
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_test@ds=10
+#### A masked pattern was here ####
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_test@ds=10
+Found 1 items
+#### A masked pattern was here ####
 PREHOOK: query: alter table orc_test add partition(ds='11')
 PREHOOK: type: ALTERTABLE_ADDPARTS
 PREHOOK: Output: default@orc_test