You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by da...@apache.org on 2015/08/10 23:33:01 UTC

hive git commit: HIVE-11438: Join a ACID table with non-ACID table fail with MR on 1.0.0 (Daniel Dai reviewed by Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/branch-1.0 06f10fe41 -> b71f6aaa9


HIVE-11438: Join a ACID table with non-ACID table fail with MR on 1.0.0 (Daniel Dai reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b71f6aaa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b71f6aaa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b71f6aaa

Branch: refs/heads/branch-1.0
Commit: b71f6aaa9a7d669130618c9adce1b820570b50fb
Parents: 06f10fe
Author: Daniel Dai <da...@hortonworks.com>
Authored: Mon Aug 10 14:30:56 2015 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Mon Aug 10 14:31:39 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  1 +
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 10 +++-
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  2 +-
 .../queries/clientpositive/join_acid_non_acid.q | 24 ++++++++
 .../clientpositive/join_acid_non_acid.q.out     | 58 ++++++++++++++++++++
 5 files changed, 93 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 3690e5c..18289f7 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -25,6 +25,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
   infer_bucket_sort_reducers_power_two.q,\
   input16_cc.q,\
   join1.q,\
+  join_acid_non_acid.q,\
   leftsemijoin_mr.q,\
   list_bucket_dml_10.q,\
   load_fs2.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 913288f..b2db584 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -382,7 +382,15 @@ public final class Utilities {
           in = new ByteArrayInputStream(planBytes);
           in = new InflaterInputStream(in);
         } else {
-          in = new FileInputStream(localPath.toUri().getPath());
+          try {
+            in = new FileInputStream(localPath.toUri().getPath());
+          } catch (FileNotFoundException fnf) {
+          }
+          // If it is on frontend, localPath does not exist, try
+          // to fetch it on hdfs
+          if (in == null) {
+            in = path.getFileSystem(conf).open(path);
+          }
         }
 
         if(MAP_PLAN_NAME.equals(name)){

http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 200daa5..be0c947 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -132,7 +132,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
   @Override
   public boolean shouldSkipCombine(Path path,
                                    Configuration conf) throws IOException {
-    return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf);
+    return (conf.getBoolean(AcidUtils.CONF_ACID_KEY, false)) || AcidUtils.isAcid(path, conf);
   }
 
   private static class OrcRecordReader

http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/queries/clientpositive/join_acid_non_acid.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_acid_non_acid.q b/ql/src/test/queries/clientpositive/join_acid_non_acid.q
new file mode 100644
index 0000000..43d768f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_acid_non_acid.q
@@ -0,0 +1,24 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true");
+
+INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I');
+
+CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE orc_table VALUES (1, 'x');
+
+set hive.cbo.enable=true;
+SET hive.execution.engine=mr;
+SET hive.auto.convert.join=false;
+SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.conf.validation=false;
+SET hive.doing.acid=false;
+
+SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1;

http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_acid_non_acid.q.out b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
new file mode 100644
index 0000000..4905351
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
@@ -0,0 +1,58 @@
+PREHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_update_table
+POSTHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_update_table
+PREHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@orc_update_table
+POSTHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@orc_update_table
+POSTHOOK: Lineage: orc_update_table.f1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: orc_update_table.k1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: orc_update_table.op_code SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_table
+POSTHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_table
+PREHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@orc_table
+POSTHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@orc_table
+POSTHOOK: Lineage: orc_table.f1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: orc_table.k1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_table
+PREHOOK: Input: default@orc_update_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_table
+POSTHOOK: Input: default@orc_update_table
+#### A masked pattern was here ####
+1	x	1	a	I