You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2012/03/22 19:50:55 UTC

svn commit: r1303978 - in /hive/trunk/hbase-handler/src: java/org/apache/hadoop/hive/hbase/ test/queries/ test/results/

Author: hashutosh
Date: Thu Mar 22 18:50:54 2012
New Revision: 1303978

URL: http://svn.apache.org/viewvc?rev=1303978&view=rev
Log:
HIVE-2819 : Closed range scans on hbase keys : Reviewed by Carl Steinbach (hashutosh)

Added:
    hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q
    hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out
Modified:
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
    hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
    hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java Thu Mar 22 18:50:54 2012
@@ -301,12 +301,22 @@ public class HBaseStorageHandler extends
       new ArrayList<IndexSearchCondition>();
     ExprNodeDesc residualPredicate =
       analyzer.analyzePredicate(predicate, searchConditions);
-    if (searchConditions.size() != 1) {
+    int scSize = searchConditions.size();
+    if (scSize < 1 || 2 < scSize) {
       // Either there was nothing which could be pushed down (size = 0),
-      // or more than one predicate (size > 1); in the latter case,
-      // we bail out for now since multiple lookups on the key are
-      // either contradictory or redundant.  We'll need to handle
-      // this better later when we support more interesting predicates.
+      // there were complex predicates which we don't support yet.
+      // Currently supported are one of the form:
+      // 1. key < 20                        (size = 1)
+      // 2. key = 20                        (size = 1)
+      // 3. key < 20 and key > 10           (size = 2)
+      return null;
+    }
+    if (scSize == 2 &&
+        (searchConditions.get(0).getComparisonOp()
+        .equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual") ||
+        searchConditions.get(1).getComparisonOp()
+        .equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual"))) {
+      // If one of the predicates is =, then any other predicate with it is illegal.
       return null;
     }
 

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java Thu Mar 22 18:50:54 2012
@@ -274,51 +274,50 @@ public class HiveHBaseTableInputFormat e
 
     // There should be exactly one predicate since we already
     // negotiated that also.
-    if (searchConditions.size() != 1) {
+    if (searchConditions.size() < 1 || searchConditions.size() > 2) {
       throw new RuntimeException(
-        "Exactly one search condition expected in push down");
+        "Either one or two search conditions expected in push down");
     }
 
     // Convert the search condition into a restriction on the HBase scan
-    IndexSearchCondition sc = searchConditions.get(0);
-    ExprNodeConstantEvaluator eval =
-      new ExprNodeConstantEvaluator(sc.getConstantDesc());
-
-    PrimitiveObjectInspector objInspector;
-    Object writable;
-
-    try{
-      objInspector = (PrimitiveObjectInspector)eval.initialize(null);
-      writable = eval.evaluate(null);
-    } catch (ClassCastException cce) {
-      throw new IOException("Currently only primitve types are supported. Found: " +
-        sc.getConstantDesc().getTypeString());
-    } catch (HiveException e) {
-      throw new IOException(e);
-    }
-
-    byte [] constantVal = getConstantVal(writable, objInspector, isKeyBinary);
     byte [] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW;
-    String comparisonOp = sc.getComparisonOp();
+    for (IndexSearchCondition sc : searchConditions){
 
-    if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
-      startRow = constantVal;
-      stopRow = getNextBA(constantVal);
-    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
-      stopRow = constantVal;
-    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
-        .equals(comparisonOp)) {
-      startRow = constantVal;
-    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
-        .equals(comparisonOp)){
-      startRow = getNextBA(constantVal);
-    } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
-        .equals(comparisonOp)){
-      stopRow = getNextBA(constantVal);
-    } else {
-      throw new IOException(comparisonOp + " is not a supported comparison operator");
-    }
+      ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc());
+      PrimitiveObjectInspector objInspector;
+      Object writable;
+
+      try{
+        objInspector = (PrimitiveObjectInspector)eval.initialize(null);
+        writable = eval.evaluate(null);
+      } catch (ClassCastException cce) {
+        throw new IOException("Currently only primitve types are supported. Found: " +
+            sc.getConstantDesc().getTypeString());
+      } catch (HiveException e) {
+        throw new IOException(e);
+      }
 
+      byte [] constantVal = getConstantVal(writable, objInspector, isKeyBinary);
+      String comparisonOp = sc.getComparisonOp();
+
+      if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
+        startRow = constantVal;
+        stopRow = getNextBA(constantVal);
+      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
+        stopRow = constantVal;
+      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
+          .equals(comparisonOp)) {
+        startRow = constantVal;
+      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
+          .equals(comparisonOp)){
+        startRow = getNextBA(constantVal);
+      } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
+          .equals(comparisonOp)){
+        stopRow = getNextBA(constantVal);
+      } else {
+        throw new IOException(comparisonOp + " is not a supported comparison operator");
+      }
+    }
     if (tableSplit != null) {
       tableSplit = new TableSplit(
         tableSplit.getTableName(),
@@ -335,7 +334,8 @@ public class HiveHBaseTableInputFormat e
         boolean isKeyBinary) throws IOException{
 
         if (!isKeyBinary){
-          // Key is stored in text format. Get bytes representation of constant also of text format.
+          // Key is stored in text format. Get bytes representation of constant also of
+          // text format.
           byte[] startRow;
           ByteStream.Output serializeStream = new ByteStream.Output();
           LazyUtils.writePrimitiveUTF8(serializeStream, writable, poi, false, (byte) 0, null);

Modified: hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q (original)
+++ hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q Thu Mar 22 18:50:54 2012
@@ -53,7 +53,7 @@ where (case when key<'90' then 2 else 4 
 explain select * from hbase_pushdown
 where key<='80' or value like '%90%';
 
--- following will not be pushed into hbase
+-- following will get pushed into hbase after HIVE-2819
 explain select * from hbase_pushdown where key > '281' 
 and key < '287';
 

Added: hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q?rev=1303978&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q (added)
+++ hive/trunk/hbase-handler/src/test/queries/ppd_key_ranges.q Thu Mar 22 18:50:54 2012
@@ -0,0 +1,22 @@
+CREATE TABLE hbase_ppd_keyrange(key int, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string");
+
+INSERT OVERWRITE TABLE hbase_ppd_keyrange 
+SELECT *
+FROM src;
+
+explain select * from hbase_ppd_keyrange where key > 8 and key < 21;
+select * from hbase_ppd_keyrange where key > 8 and key < 21;
+
+explain select * from hbase_ppd_keyrange where key > 8 and key <= 17;
+select * from hbase_ppd_keyrange where key > 8 and key <= 17;
+
+
+explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%';
+select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%';
+
+explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11;
+select * from hbase_ppd_keyrange where key >=9  and key < 17 and key = 11;
+
+drop table  hbase_ppd_keyrange;

Modified: hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out?rev=1303978&r1=1303977&r2=1303978&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out (original)
+++ hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out Thu Mar 22 18:50:54 2012
@@ -327,9 +327,12 @@ STAGE PLANS:
         hbase_pushdown 
           TableScan
             alias: hbase_pushdown
+            filterExpr:
+                expr: ((key < '80') and (key > '90'))
+                type: boolean
             Filter Operator
               predicate:
-                  expr: (((key < '80') and (key > '90')) and (value like '%90%'))
+                  expr: (value like '%90%')
                   type: boolean
               Select Operator
                 expressions:
@@ -478,11 +481,11 @@ STAGE PLANS:
       limit: -1
 
 
-PREHOOK: query: -- following will not be pushed into hbase
+PREHOOK: query: -- following will get pushed into hbase after HIVE-2819
 explain select * from hbase_pushdown where key > '281' 
 and key < '287'
 PREHOOK: type: QUERY
-POSTHOOK: query: -- following will not be pushed into hbase
+POSTHOOK: query: -- following will get pushed into hbase after HIVE-2819
 explain select * from hbase_pushdown where key > '281' 
 and key < '287'
 POSTHOOK: type: QUERY
@@ -500,6 +503,9 @@ STAGE PLANS:
         hbase_pushdown 
           TableScan
             alias: hbase_pushdown
+            filterExpr:
+                expr: ((key > '281') and (key < '287'))
+                type: boolean
             Filter Operator
               predicate:
                   expr: ((key > '281') and (key < '287'))

Added: hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out?rev=1303978&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out (added)
+++ hive/trunk/hbase-handler/src/test/results/ppd_key_ranges.q.out Thu Mar 22 18:50:54 2012
@@ -0,0 +1,251 @@
+PREHOOK: query: CREATE TABLE hbase_ppd_keyrange(key int, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE hbase_ppd_keyrange(key int, value string) 
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@hbase_ppd_keyrange
+PREHOOK: query: INSERT OVERWRITE TABLE hbase_ppd_keyrange 
+SELECT *
+FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_ppd_keyrange
+POSTHOOK: query: INSERT OVERWRITE TABLE hbase_ppd_keyrange 
+SELECT *
+FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_ppd_keyrange
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key < 21
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (< (TOK_TABLE_OR_COL key) 21)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_ppd_keyrange 
+          TableScan
+            alias: hbase_ppd_keyrange
+            filterExpr:
+                expr: ((key > 8) and (key < 21))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 8) and (key < 21))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key < 21
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key < 21
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+9	val_9
+10	val_10
+11	val_11
+12	val_12
+15	val_15
+17	val_17
+18	val_18
+19	val_19
+20	val_20
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_ppd_keyrange 
+          TableScan
+            alias: hbase_ppd_keyrange
+            filterExpr:
+                expr: ((key > 8) and (key <= 17))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: ((key > 8) and (key <= 17))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+9	val_9
+10	val_10
+11	val_11
+12	val_12
+15	val_15
+17	val_17
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (> (TOK_TABLE_OR_COL key) 8) (<= (TOK_TABLE_OR_COL key) 17)) (like (TOK_TABLE_OR_COL value) '%11%')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_ppd_keyrange 
+          TableScan
+            alias: hbase_ppd_keyrange
+            filterExpr:
+                expr: ((key > 8) and (key <= 17))
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (value like '%11%')
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key > 8 and key <= 17 and value like '%11%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+11	val_11
+PREHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from hbase_ppd_keyrange where key >= 9 and key < 17 and key = 11
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_ppd_keyrange))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>= (TOK_TABLE_OR_COL key) 9) (< (TOK_TABLE_OR_COL key) 17)) (= (TOK_TABLE_OR_COL key) 11)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        hbase_ppd_keyrange 
+          TableScan
+            alias: hbase_ppd_keyrange
+            Filter Operator
+              predicate:
+                  expr: (((key >= 9) and (key < 17)) and (key = 11))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select * from hbase_ppd_keyrange where key >=9  and key < 17 and key = 11
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ppd_keyrange where key >=9  and key < 17 and key = 11
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ppd_keyrange
+#### A masked pattern was here ####
+11	val_11
+PREHOOK: query: drop table  hbase_ppd_keyrange
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hbase_ppd_keyrange
+PREHOOK: Output: default@hbase_ppd_keyrange
+POSTHOOK: query: drop table  hbase_ppd_keyrange
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hbase_ppd_keyrange
+POSTHOOK: Output: default@hbase_ppd_keyrange