You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2012/03/22 19:50:55 UTC
svn commit: r1303978 - in /hive/trunk/hbase-handler/src:
java/org/apache/hadoop/hive/hbase/ test/queries/ test/results/
Author: hashutosh
Date: Thu Mar 22 18:50:54 2012
New Revision: 1303978
URL: http://svn.apache.org/viewvc?rev=1303978&view=rev
Log:
HIVE-2819 : Closed range scans on hbase keys : Reviewed by Carl Steinbach (hashutosh)
Added:
hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q
hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out
Modified:
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java Thu Mar 22 18:50:54 2012
@@ -301,12 +301,22 @@ public class HBaseStorageHandler extends
new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate =
analyzer.analyzePredicate(predicate, searchConditions);
- if (searchConditions.size() != 1) {
+ int scSize = searchConditions.size();
+ if (scSize < 1 || 2 < scSize) {
// Either there was nothing which could be pushed down (size = 0),
- // or more than one predicate (size > 1); in the latter case,
- // we bail out for now since multiple lookups on the key are
- // either contradictory or redundant. We'll need to handle
- // this better later when we support more interesting predicates.
+ // there were complex predicates which we don't support yet.
+ // Currently supported are one of the form:
+ // 1. key < 20 (size = 1)
+ // 2. key = 20 (size = 1)
+ // 3. key < 20 and key > 10 (size = 2)
+ return null;
+ }
+ if (scSize == 2 &&
+ (searchConditions.get(0).getComparisonOp()
+ .equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual") ||
+ searchConditions.get(1).getComparisonOp()
+ .equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual"))) {
+ // If one of the predicates is =, then any other predicate with it is illegal.
return null;
}
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java Thu Mar 22 18:50:54 2012
@@ -274,51 +274,50 @@ public class HiveHBaseTableInputFormat e
// There should be exactly one predicate since we already
// negotiated that also.
- if (searchConditions.size() != 1) {
+ if (searchConditions.size() < 1 || searchConditions.size() > 2) {
throw new RuntimeException(
- "Exactly one search condition expected in push down");
+ "Either one or two search conditions expected in push down");
}
// Convert the search condition into a restriction on the HBase scan
- IndexSearchCondition sc = searchConditions.get(0);
- ExprNodeConstantEvaluator eval =
- new ExprNodeConstantEvaluator(sc.getConstantDesc());
-
- PrimitiveObjectInspector objInspector;
- Object writable;
-
- try{
- objInspector = (PrimitiveObjectInspector)eval.initialize(null);
- writable = eval.evaluate(null);
- } catch (ClassCastException cce) {
- throw new IOException("Currently only primitve types are supported. Found: " +
- sc.getConstantDesc().getTypeString());
- } catch (HiveException e) {
- throw new IOException(e);
- }
-
- byte [] constantVal = getConstantVal(writable, objInspector, isKeyBinary);
byte [] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW;
- String comparisonOp = sc.getComparisonOp();
+ for (IndexSearchCondition sc : searchConditions){
- if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
- startRow = constantVal;
- stopRow = getNextBA(constantVal);
- } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
- stopRow = constantVal;
- } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
- .equals(comparisonOp)) {
- startRow = constantVal;
- } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
- .equals(comparisonOp)){
- startRow = getNextBA(constantVal);
- } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
- .equals(comparisonOp)){
- stopRow = getNextBA(constantVal);
- } else {
- throw new IOException(comparisonOp + " is not a supported comparison operator");
- }
+ ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc());
+ PrimitiveObjectInspector objInspector;
+ Object writable;
+
+ try{
+ objInspector = (PrimitiveObjectInspector)eval.initialize(null);
+ writable = eval.evaluate(null);
+ } catch (ClassCastException cce) {
+ throw new IOException("Currently only primitve types are supported. Found: " +
+ sc.getConstantDesc().getTypeString());
+ } catch (HiveException e) {
+ throw new IOException(e);
+ }
+ byte [] constantVal = getConstantVal(writable, objInspector, isKeyBinary);
+ String comparisonOp = sc.getComparisonOp();
+
+ if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
+ startRow = constantVal;
+ stopRow = getNextBA(constantVal);
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
+ stopRow = constantVal;
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
+ .equals(comparisonOp)) {
+ startRow = constantVal;
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
+ .equals(comparisonOp)){
+ startRow = getNextBA(constantVal);
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
+ .equals(comparisonOp)){
+ stopRow = getNextBA(constantVal);
+ } else {
+ throw new IOException(comparisonOp + " is not a supported comparison operator");
+ }
+ }
if (tableSplit != null) {
tableSplit = new TableSplit(
tableSplit.getTableName(),
@@ -335,7 +334,8 @@ public class HiveHBaseTableInputFormat e
boolean isKeyBinary) throws IOException{
if (!isKeyBinary){
- // Key is stored in text format. Get bytes representation of constant also of text format.
+ // Key is stored in text format. Get bytes representation of constant also of
+ // text format.
byte[] startRow;
ByteStream.Output serializeStream = new ByteStream.Output();
LazyUtils.writePrimitiveUTF8(serializeStream, writable, poi, false, (byte) 0, null);
Modified: hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q (original)
+++ hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q Thu Mar 22 18:50:54 2012
@@ -53,7 +53,7 @@ where (case when key<'90' then 2 else 4
explain select * from hbase_pushdown
where key<='80' or value like '%90%';
--- following will not be pushed into hbase
+-- following will get pushed into hbase after HIVE-2819
explain select * from hbase_pushdown where key > '281'
and key < '287';
Added: hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q?rev=1303978&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q (added)
+++ hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q Thu Mar 22 18:50:54 2012
@@ -0,0 +1,22 @@
+CREATE TABLE hbase_ppd_keyrange(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string");
+
+INSERT OVERWRITE TABLE hbase_ppd_keyrange
+SELECT *
+FROM src;
+
+explain select * from hbase_ppd_keyrange where key > 8 and key < 21;
+select * from hbase_ppd_keyrange where key > 8 and key < 21;
+
+explain select * from hbase_ppd_keyrange where key > 8 and key <= 17;
+select * from hbase_ppd_keyrange where key > 8 and key <= 17;
+
+
+explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%';
+select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%';
+
+explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11;
+select * from hbase_ppd_keyrange where key >=9 and key < 17 and key = 11;
+
+drop table hbase_ppd_keyrange;
Modified: hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out (original)
+++ hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out Thu Mar 22 18:50:54 2012
@@ -327,9 +327,12 @@ STAGE PLANS:
hbase_pushdown
TableScan
alias: hbase_pushdown
+ filterExpr:
+ expr: ((key < '80') and (key > '90'))
+ type: boolean
Filter Operator
predicate:
- expr: (((key < '80') and (key > '90')) and (value like '%90%'))
+ expr: (value like '%90%')
type: boolean
Select Operator
expressions:
@@ -478,11 +481,11 @@ STAGE PLANS:
limit: -1
-PREHOOK: query: -- following will not be pushed into hbase
+PREHOOK: query: -- following will get pushed into hbase after HIVE-2819
explain select * from hbase_pushdown where key > '281'
and key < '287'
PREHOOK: type: QUERY
-POSTHOOK: query: -- following will not be pushed into hbase
+POSTHOOK: query: -- following will get pushed into hbase after HIVE-2819
explain select * from hbase_pushdown where key > '281'
and key < '287'
POSTHOOK: type: QUERY
@@ -500,6 +503,9 @@ STAGE PLANS:
hbase_pushdown
TableScan
alias: hbase_pushdown
+ filterExpr:
+ expr: ((key > '281') and (key < '287'))
+ type: boolean
Filter Operator
predicate:
expr: ((key > '281') and (key < '287'))
Added: hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out?rev=1303978&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out (added)
+++ hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out Thu Mar 22 18:50:54 2012
@@ -0,0 +1,251 @@
+PREHOOK: query: CREATE TABLE hbase_ppd_keyrange(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE hbase_ppd_keyrange(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@hbase_ppd_keyrange
+PREHOOK: query: INSERT OVERWRITE TABLE hbase_ppd_keyrange
+SELECT *
+FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_ppd_keyrange
+POSTHOOK: query: INSERT OVERWRITE TABLE hbase_ppd_keyrange
+SELECT *
+FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_ppd_keyrange
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (< (TOK_TABLE_OR_COL key) 21)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_ppd_keyrange
+ TableScan
+ alias: hbase_ppd_keyrange
+ filterExpr:
+ expr: ((key > 8) and (key < 21))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 8) and (key < 21))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key < 21
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key < 21
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+9 val_9
+10 val_10
+11 val_11
+12 val_12
+15 val_15
+17 val_17
+18 val_18
+19 val_19
+20 val_20
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_ppd_keyrange
+ TableScan
+ alias: hbase_ppd_keyrange
+ filterExpr:
+ expr: ((key > 8) and (key <= 17))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((key > 8) and (key <= 17))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+9 val_9
+10 val_10
+11 val_11
+12 val_12
+15 val_15
+17 val_17
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17)) (like (TOK_TABLE_OR_COL value) '%11%')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_ppd_keyrange
+ TableScan
+ alias: hbase_ppd_keyrange
+ filterExpr:
+ expr: ((key > 8) and (key <= 17))
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (value like '%11%')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+11 val_11
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>= (TOK_TABLE_OR_COL key) 9) (< (TOK_TABLE_OR_COL key) 17)) (= (TOK_TABLE_OR_COL key) 11)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_ppd_keyrange
+ TableScan
+ alias: hbase_ppd_keyrange
+ Filter Operator
+ predicate:
+ expr: (((key >= 9) and (key < 17)) and (key = 11))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key >=9 and key < 17 and key = 11
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key >=9 and key < 17 and key = 11
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+11 val_11
+PREHOOK: query: drop table hbase_ppd_keyrange
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hbase_ppd_keyrange
+PREHOOK: Output: default@hbase_ppd_keyrange
+POSTHOOK: query: drop table hbase_ppd_keyrange
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hbase_ppd_keyrange
+POSTHOOK: Output: default@hbase_ppd_keyrange