You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/26 19:12:14 UTC
svn commit: r1627830 - in /hive/trunk:
hbase-handler/src/test/queries/positive/hbase_ppd_join.q
hbase-handler/src/test/results/positive/hbase_ppd_join.q.out
ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Author: brock
Date: Fri Sep 26 17:12:14 2014
New Revision: 1627830
URL: http://svn.apache.org/r1627830
Log:
HIVE-8189 - A select statement with a subquery is failing with HBaseSerde (Yongzhi Chen via Brock)
Added:
hive/trunk/hbase-handler/src/test/queries/positive/hbase_ppd_join.q
hive/trunk/hbase-handler/src/test/results/positive/hbase_ppd_join.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
Added: hive/trunk/hbase-handler/src/test/queries/positive/hbase_ppd_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/positive/hbase_ppd_join.q?rev=1627830&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/positive/hbase_ppd_join.q (added)
+++ hive/trunk/hbase-handler/src/test/queries/positive/hbase_ppd_join.q Fri Sep 26 17:12:14 2014
@@ -0,0 +1,61 @@
+--create hive hbase table 1
+drop table if exists hive1_tbl_data_hbase1;
+drop table if exists hive1_tbl_data_hbase2;
+drop view if exists hive1_view_data_hbase1;
+drop view if exists hive1_view_data_hbase2;
+
+CREATE TABLE hive1_tbl_data_hbase1 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+;
+
+--create hive view for the above hive table 1
+CREATE VIEW hive1_view_data_hbase1
+AS
+SELECT *
+FROM hive1_tbl_data_hbase1
+WHERE PK_COLUM >='4000-00000'
+and PK_COLUM <='4000-99999'
+AND COL_UPDATED_DATE IS NOT NULL
+;
+
+
+--load data to hive table 1
+insert into table hive1_tbl_data_hbase1 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','4000-10000' from src where key = 100;
+
+--create hive hbase table 2
+CREATE TABLE hive1_tbl_data_hbase2 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+;
+
+--create hive view for the above hive hbase table 2
+CREATE VIEW hive1_view_data_hbase2
+AS
+SELECT *
+FROM hive1_tbl_data_hbase2
+where COL_UPDATED_DATE IS NOT NULL
+;
+
+
+--load data to hive hbase table 2
+insert into table hive1_tbl_data_hbase2 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','00001' from src where key = 100;
+;
+
+set hive.optimize.ppd = true;
+set hive.auto.convert.join=false;
+
+-- do not return value without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x;
+
+set hive.auto.convert.join=true;
+
+-- return value with/without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x;
+
Added: hive/trunk/hbase-handler/src/test/results/positive/hbase_ppd_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/positive/hbase_ppd_join.q.out?rev=1627830&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/positive/hbase_ppd_join.q.out (added)
+++ hive/trunk/hbase-handler/src/test/results/positive/hbase_ppd_join.q.out Fri Sep 26 17:12:14 2014
@@ -0,0 +1,154 @@
+PREHOOK: query: --create hive hbase table 1
+drop table if exists hive1_tbl_data_hbase1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: --create hive hbase table 1
+drop table if exists hive1_tbl_data_hbase1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists hive1_tbl_data_hbase2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists hive1_tbl_data_hbase2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop view if exists hive1_view_data_hbase1
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view if exists hive1_view_data_hbase1
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: drop view if exists hive1_view_data_hbase2
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view if exists hive1_view_data_hbase2
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: CREATE TABLE hive1_tbl_data_hbase1 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive1_tbl_data_hbase1
+POSTHOOK: query: CREATE TABLE hive1_tbl_data_hbase1 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive1_tbl_data_hbase1
+PREHOOK: query: --create hive view for the above hive table 1
+CREATE VIEW hive1_view_data_hbase1
+AS
+SELECT *
+FROM hive1_tbl_data_hbase1
+WHERE PK_COLUM >='4000-00000'
+and PK_COLUM <='4000-99999'
+AND COL_UPDATED_DATE IS NOT NULL
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@hive1_tbl_data_hbase1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive1_view_data_hbase1
+POSTHOOK: query: --create hive view for the above hive table 1
+CREATE VIEW hive1_view_data_hbase1
+AS
+SELECT *
+FROM hive1_tbl_data_hbase1
+WHERE PK_COLUM >='4000-00000'
+and PK_COLUM <='4000-99999'
+AND COL_UPDATED_DATE IS NOT NULL
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@hive1_tbl_data_hbase1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive1_view_data_hbase1
+PREHOOK: query: --load data to hive table 1
+insert into table hive1_tbl_data_hbase1 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','4000-10000' from src where key = 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hive1_tbl_data_hbase1
+POSTHOOK: query: --load data to hive table 1
+insert into table hive1_tbl_data_hbase1 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','4000-10000' from src where key = 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hive1_tbl_data_hbase1
+PREHOOK: query: --create hive hbase table 2
+CREATE TABLE hive1_tbl_data_hbase2 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive1_tbl_data_hbase2
+POSTHOOK: query: --create hive hbase table 2
+CREATE TABLE hive1_tbl_data_hbase2 (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES("hbase.columns.mapping" = "default:COLUMID,default:COLUMN_FN,default:COLUMN_LN,default:EMAIL,default:COL_UPDATED_DATE,:key"
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive1_tbl_data_hbase2
+PREHOOK: query: --create hive view for the above hive hbase table 2
+CREATE VIEW hive1_view_data_hbase2
+AS
+SELECT *
+FROM hive1_tbl_data_hbase2
+where COL_UPDATED_DATE IS NOT NULL
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@hive1_tbl_data_hbase2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive1_view_data_hbase2
+POSTHOOK: query: --create hive view for the above hive hbase table 2
+CREATE VIEW hive1_view_data_hbase2
+AS
+SELECT *
+FROM hive1_tbl_data_hbase2
+where COL_UPDATED_DATE IS NOT NULL
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@hive1_tbl_data_hbase2
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive1_view_data_hbase2
+PREHOOK: query: --load data to hive hbase table 2
+insert into table hive1_tbl_data_hbase2 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','00001' from src where key = 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hive1_tbl_data_hbase2
+POSTHOOK: query: --load data to hive hbase table 2
+insert into table hive1_tbl_data_hbase2 select '00001','john','doe','john@hotmail.com','2014-01-01 12:01:02','00001' from src where key = 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hive1_tbl_data_hbase2
+PREHOOK: query: -- do not return value without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive1_tbl_data_hbase1
+PREHOOK: Input: default@hive1_tbl_data_hbase2
+PREHOOK: Input: default@hive1_view_data_hbase1
+PREHOOK: Input: default@hive1_view_data_hbase2
+#### A masked pattern was here ####
+POSTHOOK: query: -- do not return value without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive1_tbl_data_hbase1
+POSTHOOK: Input: default@hive1_tbl_data_hbase2
+POSTHOOK: Input: default@hive1_view_data_hbase1
+POSTHOOK: Input: default@hive1_view_data_hbase2
+#### A masked pattern was here ####
+john john@hotmail.com
+PREHOOK: query: -- return value with/without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive1_tbl_data_hbase1
+PREHOOK: Input: default@hive1_tbl_data_hbase2
+PREHOOK: Input: default@hive1_view_data_hbase1
+PREHOOK: Input: default@hive1_view_data_hbase2
+#### A masked pattern was here ####
+POSTHOOK: query: -- return value with/without fix
+
+select x.FIRST_NAME1, x.EMAIL1 from (
+select p.COLUMN_FN as first_name1, a.EMAIL as email1 from hive1_view_data_hbase2 p inner join hive1_view_data_hbase1 a on p.COLUMID =a.COLUMID) x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive1_tbl_data_hbase1
+POSTHOOK: Input: default@hive1_tbl_data_hbase2
+POSTHOOK: Input: default@hive1_view_data_hbase1
+POSTHOOK: Input: default@hive1_view_data_hbase2
+#### A masked pattern was here ####
+john john@hotmail.com
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1627830&r1=1627829&r2=1627830&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Fri Sep 26 17:12:14 2014
@@ -428,6 +428,9 @@ public class HiveInputFormat<K extends W
public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) {
+ // ensure filters are not set from previous pushFilters
+ jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR);
+ jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
TableScanDesc scanDesc = tableScan.getConf();
if (scanDesc == null) {
return;