You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by da...@apache.org on 2015/08/10 23:33:01 UTC
hive git commit: HIVE-11438: Join a ACID table with non-ACID table
fail with MR on 1.0.0 (Daniel Dai reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/branch-1.0 06f10fe41 -> b71f6aaa9
HIVE-11438: Join a ACID table with non-ACID table fail with MR on 1.0.0 (Daniel Dai reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b71f6aaa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b71f6aaa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b71f6aaa
Branch: refs/heads/branch-1.0
Commit: b71f6aaa9a7d669130618c9adce1b820570b50fb
Parents: 06f10fe
Author: Daniel Dai <da...@hortonworks.com>
Authored: Mon Aug 10 14:30:56 2015 -0700
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Mon Aug 10 14:31:39 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../apache/hadoop/hive/ql/exec/Utilities.java | 10 +++-
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 2 +-
.../queries/clientpositive/join_acid_non_acid.q | 24 ++++++++
.../clientpositive/join_acid_non_acid.q.out | 58 ++++++++++++++++++++
5 files changed, 93 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 3690e5c..18289f7 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -25,6 +25,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
infer_bucket_sort_reducers_power_two.q,\
input16_cc.q,\
join1.q,\
+ join_acid_non_acid.q,\
leftsemijoin_mr.q,\
list_bucket_dml_10.q,\
load_fs2.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 913288f..b2db584 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -382,7 +382,15 @@ public final class Utilities {
in = new ByteArrayInputStream(planBytes);
in = new InflaterInputStream(in);
} else {
- in = new FileInputStream(localPath.toUri().getPath());
+ try {
+ in = new FileInputStream(localPath.toUri().getPath());
+ } catch (FileNotFoundException fnf) {
+ }
+ // If it is on frontend, localPath does not exist, try
+ // to fetch it on hdfs
+ if (in == null) {
+ in = path.getFileSystem(conf).open(path);
+ }
}
if(MAP_PLAN_NAME.equals(name)){
http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 200daa5..be0c947 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -132,7 +132,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
@Override
public boolean shouldSkipCombine(Path path,
Configuration conf) throws IOException {
- return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf);
+ return (conf.getBoolean(AcidUtils.CONF_ACID_KEY, false)) || AcidUtils.isAcid(path, conf);
}
private static class OrcRecordReader
http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/queries/clientpositive/join_acid_non_acid.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_acid_non_acid.q b/ql/src/test/queries/clientpositive/join_acid_non_acid.q
new file mode 100644
index 0000000..43d768f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_acid_non_acid.q
@@ -0,0 +1,24 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true");
+
+INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I');
+
+CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE orc_table VALUES (1, 'x');
+
+set hive.cbo.enable=true;
+SET hive.execution.engine=mr;
+SET hive.auto.convert.join=false;
+SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.conf.validation=false;
+SET hive.doing.acid=false;
+
+SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1;
http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_acid_non_acid.q.out b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
new file mode 100644
index 0000000..4905351
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out
@@ -0,0 +1,58 @@
+PREHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_update_table
+POSTHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING)
+CLUSTERED BY (k1) INTO 2 BUCKETS
+STORED AS ORC TBLPROPERTIES("transactional"="true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_update_table
+PREHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@orc_update_table
+POSTHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@orc_update_table
+POSTHOOK: Lineage: orc_update_table.f1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: orc_update_table.k1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: orc_update_table.op_code SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_table
+POSTHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING)
+CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_table
+PREHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@orc_table
+POSTHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@orc_table
+POSTHOOK: Lineage: orc_table.f1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: orc_table.k1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_table
+PREHOOK: Input: default@orc_update_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.*, t2.* FROM orc_table t1
+JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_table
+POSTHOOK: Input: default@orc_update_table
+#### A masked pattern was here ####
+1 x 1 a I