You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2016/12/19 16:41:22 UTC

hive git commit: HIVE-15437: avro tables join fails when - tbl join tbl_postfix (Yongzhi Chen, reviewed by Chaoyu Tang)

Repository: hive
Updated Branches:
  refs/heads/master 29cce163d -> 24577b6a3


HIVE-15437: avro tables join fails when - tbl join tbl_postfix (Yongzhi Chen, reviewed by Chaoyu Tang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24577b6a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24577b6a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24577b6a

Branch: refs/heads/master
Commit: 24577b6a34da7b955a0b94e508a4b6fedad856ff
Parents: 29cce16
Author: Yongzhi Chen <yc...@apache.org>
Authored: Thu Dec 15 11:00:02 2016 -0500
Committer: Yongzhi Chen <yc...@apache.org>
Committed: Mon Dec 19 11:26:03 2016 -0500

----------------------------------------------------------------------
 data/files/table1.avsc                          | 25 ++++++
 data/files/table1_1.avsc                        | 19 +++++
 .../ql/io/avro/AvroGenericRecordReader.java     |  7 +-
 .../test/queries/clientpositive/avrotblsjoin.q  | 28 +++++++
 .../results/clientpositive/avrotblsjoin.q.out   | 82 ++++++++++++++++++++
 5 files changed, 158 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/24577b6a/data/files/table1.avsc
----------------------------------------------------------------------
diff --git a/data/files/table1.avsc b/data/files/table1.avsc
new file mode 100644
index 0000000..2c96ad2
--- /dev/null
+++ b/data/files/table1.avsc
@@ -0,0 +1,25 @@
+{
+  "type" : "record",
+  "name" : "table1",
+  "doc" : "Sqoop import of table1",
+  "fields" : [ {
+    "name" : "col1",
+    "type" : [ "null", "string" ],
+    "default" : null,
+    "columnName" : "col1",
+    "sqlType" : "12"
+  }, {
+    "name" : "col2",
+    "type" : [ "null", "long" ],
+    "default" : null,
+    "columnName" : "col2",
+    "sqlType" : "13"
+  }, {
+    "name" : "col3",
+    "type" : [ "null", "string" ],
+    "default" : null,
+    "columnName" : "col3",
+    "sqlType" : "12"
+  } ],
+  "tableName" : "table1"
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/24577b6a/data/files/table1_1.avsc
----------------------------------------------------------------------
diff --git a/data/files/table1_1.avsc b/data/files/table1_1.avsc
new file mode 100644
index 0000000..1a7e518
--- /dev/null
+++ b/data/files/table1_1.avsc
@@ -0,0 +1,19 @@
+{
+  "type" : "record",
+  "name" : "table1_1",
+  "doc" : "Sqoop import of table1_1",
+  "fields" : [ {
+    "name" : "col1",
+    "type" : [ "null", "long" ],
+    "default" : null,
+    "columnName" : "col1",
+    "sqlType" : "13"
+  }, {
+    "name" : "col2",
+    "type" : [ "null", "string" ],
+    "default" : null,
+    "columnName" : "col2",
+    "sqlType" : "12"
+  }],
+  "tableName" : "table1_1"
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/24577b6a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
index 4fccfc1..68138c8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
@@ -32,6 +32,7 @@ import org.apache.avro.mapred.FsInput;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -146,10 +147,10 @@ public class AvroGenericRecordReader implements
   private boolean pathIsInPartition(Path split, Path partitionPath) {
     boolean schemeless = split.toUri().getScheme() == null;
     if (schemeless) {
-      String schemelessPartitionPath = partitionPath.toUri().getPath();
-      return split.toString().startsWith(schemelessPartitionPath);
+      Path pathNoSchema = Path.getPathWithoutSchemeAndAuthority(partitionPath);
+      return FileUtils.isPathWithinSubtree(split,pathNoSchema);
     } else {
-      return split.toString().startsWith(partitionPath.toString());
+      return FileUtils.isPathWithinSubtree(split,partitionPath);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/24577b6a/ql/src/test/queries/clientpositive/avrotblsjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avrotblsjoin.q b/ql/src/test/queries/clientpositive/avrotblsjoin.q
new file mode 100644
index 0000000..8c1f084
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avrotblsjoin.q
@@ -0,0 +1,28 @@
+drop table if exists table1;
+drop table if exists table1_1;
+
+dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/;
+dfs -cp ${system:hive.root}data/files/table1_1.avsc ${system:test.tmp.dir}/;
+
+create table table1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+   TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc');
+create table table1_1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+   TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1_1.avsc');
+insert into table1 values ("1", "2", "3");
+insert into table1_1 values (1, "2");
+set hive.auto.convert.join=false;
+set hive.strict.checks.type.safety=false;
+set hive.mapred.mode=nonstrict;
+select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1";

http://git-wip-us.apache.org/repos/asf/hive/blob/24577b6a/ql/src/test/results/clientpositive/avrotblsjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avrotblsjoin.q.out b/ql/src/test/results/clientpositive/avrotblsjoin.q.out
new file mode 100644
index 0000000..d0170a3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avrotblsjoin.q.out
@@ -0,0 +1,82 @@
+PREHOOK: query: drop table if exists table1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists table1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists table1_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists table1_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table table1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: create table table1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: create table table1_1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1_1
+POSTHOOK: query: create table table1_1
+   ROW FORMAT SERDE
+     'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+   STORED AS INPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+   OUTPUTFORMAT
+     'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1_1
+PREHOOK: query: insert into table1 values ("1", "2", "3")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table1 values ("1", "2", "3")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.col1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: table1.col3 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: insert into table1_1 values (1, "2")
+PREHOOK: type: QUERY
+PREHOOK: Output: default@table1_1
+POSTHOOK: query: insert into table1_1 values (1, "2")
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@table1_1
+POSTHOOK: Lineage: table1_1.col1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1_1.col2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+WARNING: Comparing a bigint and a string may result in a loss of precision.
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1_1
+#### A masked pattern was here ####
+POSTHOOK: query: select table1.col1, table1_1.* from table1 join table1_1 on table1.col1=table1_1.col1 where table1_1.col1="1"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1_1
+#### A masked pattern was here ####
+1	1	2