You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2015/08/16 02:27:58 UTC

hive git commit: HIVE-11285 : ObjectInspector for partition columns in FetchOperator in SMBJoin causes exception (Pengcheng Xiong via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/branch-1.0 ec2ae2c73 -> 8e08432f0


HIVE-11285 : ObjectInspector for partition columns in FetchOperator in SMBJoin causes exception (Pengcheng Xiong via Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8e08432f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8e08432f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8e08432f

Branch: refs/heads/branch-1.0
Commit: 8e08432f081079d407a69ebaa76dff3b113d8ac6
Parents: ec2ae2c
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Thu Jul 16 02:09:00 2015 +0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sat Aug 15 17:27:42 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FetchOperator.java      |   5 +-
 .../clientpositive/smb_join_partition_key.q     |  35 +++++
 .../clientpositive/smb_join_partition_key.q.out | 128 +++++++++++++++++++
 3 files changed, 166 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8e08432f/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
index 8422782..00efe4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
@@ -293,8 +293,9 @@ public class FetchOperator implements Serializable {
     for (int i = 0; i < partKeys.length; i++) {
       String key = partKeys[i];
       partNames.add(key);    
-      ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
-          TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]));
+      ObjectInspector oi = PrimitiveObjectInspectorFactory
+              .getPrimitiveWritableObjectInspector(TypeInfoFactory
+                      .getPrimitiveTypeInfo(partKeyTypes[i]));
       partObjectInspectors.add(oi);
     }
     StructObjectInspector partObjectInspector = ObjectInspectorFactory

http://git-wip-us.apache.org/repos/asf/hive/blob/8e08432f/ql/src/test/queries/clientpositive/smb_join_partition_key.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_join_partition_key.q b/ql/src/test/queries/clientpositive/smb_join_partition_key.q
new file mode 100644
index 0000000..49e2d2f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/smb_join_partition_key.q
@@ -0,0 +1,35 @@
+SET hive.execution.engine=mr; 
+SET hive.enforce.sortmergebucketmapjoin=false; 
+SET hive.auto.convert.sortmerge.join=true; 
+SET hive.optimize.bucketmapjoin = true; 
+SET hive.optimize.bucketmapjoin.sortedmerge = true; 
+SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+
+CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; 
+
+insert into table data_table values(1, 'one');
+insert into table data_table values(2, 'two');
+
+CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) 
+SORTED BY (key) INTO 1 BUCKETS STORED AS ORC;
+
+CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table; 
+
+INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table;
+
+SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key;
+
+drop table smb_table_part;
+
+CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC;
+
+INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table;
+
+SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8e08432f/ql/src/test/results/clientpositive/smb_join_partition_key.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/smb_join_partition_key.q.out b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out
new file mode 100644
index 0000000..a4ab8c3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out
@@ -0,0 +1,128 @@
+PREHOOK: query: CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@data_table
+POSTHOOK: query: CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@data_table
+PREHOOK: query: insert into table data_table values(1, 'one')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@data_table
+POSTHOOK: query: insert into table data_table values(1, 'one')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@data_table
+POSTHOOK: Lineage: data_table.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: data_table.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table data_table values(2, 'two')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@data_table
+POSTHOOK: query: insert into table data_table values(2, 'two')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@data_table
+POSTHOOK: Lineage: data_table.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: data_table.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) 
+SORTED BY (key) INTO 1 BUCKETS STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smb_table
+POSTHOOK: query: CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) 
+SORTED BY (key) INTO 1 BUCKETS STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smb_table
+PREHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smb_table_part
+POSTHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smb_table_part
+PREHOOK: query: INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@data_table
+PREHOOK: Output: default@smb_table
+POSTHOOK: query: INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@data_table
+POSTHOOK: Output: default@smb_table
+POSTHOOK: Lineage: smb_table.key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: smb_table.value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@data_table
+PREHOOK: Output: default@smb_table_part
+POSTHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@data_table
+POSTHOOK: Output: default@smb_table_part@p1=100
+POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_table
+PREHOOK: Input: default@smb_table_part
+PREHOOK: Input: default@smb_table_part@p1=100
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_table
+POSTHOOK: Input: default@smb_table_part
+POSTHOOK: Input: default@smb_table_part@p1=100
+#### A masked pattern was here ####
+1	100
+2	100
+PREHOOK: query: drop table smb_table_part
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@smb_table_part
+PREHOOK: Output: default@smb_table_part
+POSTHOOK: query: drop table smb_table_part
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@smb_table_part
+POSTHOOK: Output: default@smb_table_part
+PREHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smb_table_part
+POSTHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) 
+CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS 
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smb_table_part
+PREHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@data_table
+PREHOOK: Output: default@smb_table_part
+POSTHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@data_table
+POSTHOOK: Output: default@smb_table_part@p1=100
+POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@smb_table
+PREHOOK: Input: default@smb_table_part
+PREHOOK: Input: default@smb_table_part@p1=100
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@smb_table
+POSTHOOK: Input: default@smb_table_part
+POSTHOOK: Input: default@smb_table_part@p1=100
+#### A masked pattern was here ####
+1	100.0
+2	100.0