You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/03/17 23:47:54 UTC

[50/51] [abbrv] hive git commit: HIVE-13285: Orc concatenation may drop old files from moving to final path (Prasanth Jayachandran reviewed by Gopal V)

HIVE-13285: Orc concatenation may drop old files from moving to final path (Prasanth Jayachandran reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a17122f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a17122f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a17122f4

Branch: refs/heads/llap
Commit: a17122f45daf391454a73895bb651d97f6e6fdc8
Parents: 0b57450
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Mar 16 14:01:52 2016 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Mar 16 14:01:52 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  1 +
 .../hive/ql/exec/AbstractFileMergeOperator.java | 23 ++++---
 .../hive/ql/exec/OrcFileMergeOperator.java      | 14 ++--
 .../clientpositive/orc_merge_incompat3.q        | 14 ++++
 .../clientpositive/orc_merge_incompat3.q.out    | 70 ++++++++++++++++++++
 .../tez/orc_merge_incompat3.q.out               | 70 ++++++++++++++++++++
 6 files changed, 176 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f991d49..39ba628 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -176,6 +176,7 @@ minitez.query.files.shared=acid_globallimit.q,\
   orc_merge11.q,\
   orc_merge_incompat1.q,\
   orc_merge_incompat2.q,\
+  orc_merge_incompat3.q,\
   orc_vectorization_ppd.q,\
   parallel.q,\
   ptf.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index f99bf11..154a78b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -208,18 +208,23 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc>
   @Override
   public void closeOp(boolean abort) throws HiveException {
     try {
-      if (!exception) {
-        FileStatus fss = fs.getFileStatus(outPath);
-        if (!fs.rename(outPath, finalPath)) {
-          throw new IOException(
-              "Unable to rename " + outPath + " to " + finalPath);
+      if (!abort) {
+        // if outPath does not exist, then it means all paths within combine split are skipped as
+        // they are incompatible for merge (for example: files without stripe stats).
+        // Those files will be added to incompatFileSet
+        if (fs.exists(outPath)) {
+          FileStatus fss = fs.getFileStatus(outPath);
+          if (!fs.rename(outPath, finalPath)) {
+            throw new IOException(
+                "Unable to rename " + outPath + " to " + finalPath);
+          }
+          LOG.info("renamed path " + outPath + " to " + finalPath + " . File" +
+              " size is "
+              + fss.getLen());
         }
-        LOG.info("renamed path " + outPath + " to " + finalPath + " . File" +
-            " size is "
-            + fss.getLen());
 
         // move any incompatible files to final path
-        if (!incompatFileSet.isEmpty()) {
+        if (incompatFileSet != null && !incompatFileSet.isEmpty()) {
           for (Path incompatFile : incompatFileSet) {
             Path destDir = finalPath.getParent();
             try {

http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
index 445cf3d..e554ab1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OrcFileMergeOperator.java
@@ -231,22 +231,22 @@ public class OrcFileMergeOperator extends
 
   @Override
   public void closeOp(boolean abort) throws HiveException {
-    // close writer
-    if (outWriter == null) {
-      return;
-    }
-
     try {
       if (fdis != null) {
         fdis.close();
         fdis = null;
       }
 
-      outWriter.close();
-      outWriter = null;
+      if (outWriter != null) {
+        outWriter.close();
+        outWriter = null;
+      }
     } catch (Exception e) {
       throw new HiveException("Unable to close OrcFileMergeOperator", e);
     }
+
+    // When there are no exceptions, this has to be called always to make sure incompatible files
+    // are moved properly to the destination path
     super.closeOp(abort);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/ql/src/test/queries/clientpositive/orc_merge_incompat3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat3.q b/ql/src/test/queries/clientpositive/orc_merge_incompat3.q
new file mode 100644
index 0000000..d6be111
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_merge_incompat3.q
@@ -0,0 +1,14 @@
+create table concat_incompat like alltypesorc;
+
+load data local inpath '../../data/files/alltypesorc' into table concat_incompat;
+load data local inpath '../../data/files/alltypesorc' into table concat_incompat;
+load data local inpath '../../data/files/alltypesorc' into table concat_incompat;
+load data local inpath '../../data/files/alltypesorc' into table concat_incompat;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/concat_incompat/;
+select count(*) from concat_incompat;
+
+ALTER TABLE concat_incompat CONCATENATE;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/concat_incompat/;
+select count(*) from concat_incompat;

http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/ql/src/test/results/clientpositive/orc_merge_incompat3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat3.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat3.q.out
new file mode 100644
index 0000000..e34492b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_merge_incompat3.q.out
@@ -0,0 +1,70 @@
+PREHOOK: query: create table concat_incompat like alltypesorc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: create table concat_incompat like alltypesorc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from concat_incompat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from concat_incompat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+49152
+PREHOOK: query: ALTER TABLE concat_incompat CONCATENATE
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@concat_incompat
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: ALTER TABLE concat_incompat CONCATENATE
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@concat_incompat
+POSTHOOK: Output: default@concat_incompat
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from concat_incompat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from concat_incompat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+49152

http://git-wip-us.apache.org/repos/asf/hive/blob/a17122f4/ql/src/test/results/clientpositive/tez/orc_merge_incompat3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/orc_merge_incompat3.q.out b/ql/src/test/results/clientpositive/tez/orc_merge_incompat3.q.out
new file mode 100644
index 0000000..e34492b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/orc_merge_incompat3.q.out
@@ -0,0 +1,70 @@
+PREHOOK: query: create table concat_incompat like alltypesorc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: create table concat_incompat like alltypesorc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+PREHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: load data local inpath '../../data/files/alltypesorc' into table concat_incompat
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@concat_incompat
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from concat_incompat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from concat_incompat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+49152
+PREHOOK: query: ALTER TABLE concat_incompat CONCATENATE
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@concat_incompat
+PREHOOK: Output: default@concat_incompat
+POSTHOOK: query: ALTER TABLE concat_incompat CONCATENATE
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@concat_incompat
+POSTHOOK: Output: default@concat_incompat
+Found 4 items
+#### A masked pattern was here ####
+PREHOOK: query: select count(*) from concat_incompat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from concat_incompat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@concat_incompat
+#### A masked pattern was here ####
+49152