You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Rajkumar Singh (JIRA)" <ji...@apache.org> on 2018/05/30 21:31:00 UTC

[jira] [Created] (HIVE-19743) hive is not pushing predicate down to HBaseStorageHandler if hive key mapped with hbase is stored as varchar

Rajkumar Singh created HIVE-19743:
-------------------------------------

             Summary: hive is not pushing predicate down to HBaseStorageHandler if hive key mapped with hbase is stored as varchar
                 Key: HIVE-19743
                 URL: https://issues.apache.org/jira/browse/HIVE-19743
             Project: Hive
          Issue Type: Bug
          Components: HBase Handler, Hive
    Affects Versions: 2.1.0
         Environment: java8,centos7
            Reporter: Rajkumar Singh


Steps to Reproduce:

{code}

//hbase table

create 'mytable', 'cf'
put 'mytable', 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4', 'cf:message', 'hello world'
put 'mytable', 'ABCDEF1|GHIJK1|ijj123kl-mn4o-4pq5-678r-st90123u0v41', 'cf:foo', 0x0

// hive table with key stored as varchar

show create table hbase_table_4;

+-----------------------------------------------------------+--+

|                      createtab_stmt                       |

+-----------------------------------------------------------+--+

| CREATE EXTERNAL TABLE `hbase_table_4`(                    |

|   `hbase_key` varchar(80) COMMENT 'from deserializer',    |

|   `value` string COMMENT 'from deserializer',             |

|   `value1` string COMMENT 'from deserializer')            |

| ROW FORMAT SERDE                                          |

|   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |

| STORED BY                                                 |

|   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |

| WITH SERDEPROPERTIES (                                    |

|   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |

|   'serialization.format'='1')                             |

| TBLPROPERTIES (                                           |

|   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |

|   'hbase.table.name'='mytable',                           |

|   'numFiles'='0',                                         |

|   'numRows'='0',                                          |

|   'rawDataSize'='0',                                      |

|   'totalSize'='0',                                        |

|   'transient_lastDdlTime'='1527708430')                   |

+-----------------------------------------------------------+--+

 

// hive table key stored as string

CREATE EXTERNAL TABLE `hbase_table_5`(                    |

|   `hbase_key` string COMMENT 'from deserializer',         |

|   `value` string COMMENT 'from deserializer',             |

|   `value1` string COMMENT 'from deserializer')            |

| ROW FORMAT SERDE                                          |

|   'org.apache.hadoop.hive.hbase.HBaseSerDe'               |

| STORED BY                                                 |

|   'org.apache.hadoop.hive.hbase.HBaseStorageHandler'      |

| WITH SERDEPROPERTIES (                                    |

|   'hbase.columns.mapping'=':key,cf:foo,cf:message',       |

|   'serialization.format'='1')                             |

| TBLPROPERTIES (                                           |

|   'COLUMN_STATS_ACCURATE'='\{\"BASIC_STATS\":\"true\"}',   |

|   'hbase.table.name'='mytable',                           |

|   'numFiles'='0',                                         |

|   'numRows'='0',                                          |

|   'rawDataSize'='0',                                      |

|   'totalSize'='0',                                        |

|   'transient_lastDdlTime'='1527708520')                   |

 

Explain Plan

 explain select * from hbase_table_4 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4'

 Stage-0                                                                                          |

|   Fetch Operator                                                                                 |

|     limit:-1                                                                                     |

|     Select Operator [SEL_2]                                                                      |

|       Output:["_col0","_col1","_col2"]                                                           |

|       Filter Operator [FIL_4]                                                                    |

|         predicate:(UDFToString(hbase_key) = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')  |

|         TableScan [TS_0]                                                                         |

|           Output:["hbase_key","value","value1"] 

 

explain on table with key stored as string

explain select * from hbase_table_5 where hbase_key='ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4';

 Plan optimized by CBO.                  |

|                                         |

| Stage-0                                 |

|   Fetch Operator                        |

|     limit:-1                            |

|     Select Operator [SEL_2]             |

|       Output:["_col0","_col1","_col2"]  |

|       TableScan [TS_0]                  |

|         Output:["value","value1"] 

{code}

 

predicate push down correctly on table which has hbase row key as string 

{code}

2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972)) - Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')

2018-05-30 21:26:45,418 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(975)) - Pushed predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')

2018-05-30 21:26:45,418 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138)) - After PPD:

TS[0]-SEL[2]-FS[3]

{code}

 while row key stored as varchar the predicate dont pushed down to predicate and fallback to residual

{code}

2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(972)) - Original predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')

2018-05-30 21:29:29,303 DEBUG [main]: ppd.OpProcFactory (OpProcFactory.java:pushFilterToStorageHandler(980)) - Residual predicate:  (hbase_key = 'ABCDEF|GHIJK|ijj123kl-mn4o-4pq5-678r-st90123u0v4')

2018-05-30 21:29:29,303 DEBUG [main]: ppd.PredicatePushDown (PredicatePushDown.java:transform(138)) - After PPD:

TS[0]-FIL[4]-SEL[2]-FS[3]

{code}

 

 

 

 

 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)