You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/03/06 21:10:27 UTC
svn commit: r1297675 - in /hive/trunk:
hbase-handler/src/java/org/apache/hadoop/hive/hbase/
hbase-handler/src/test/queries/ hbase-handler/src/test/results/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/jav...
Author: cws
Date: Tue Mar 6 20:10:26 2012
New Revision: 1297675
URL: http://svn.apache.org/viewvc?rev=1297675&view=rev
Log:
HIVE-2771 [jira] Add support for filter pushdown for key ranges in hbase for
keys of type string
(Ashutosh Chauhan via Carl Steinbach)
Summary:
https://issues.apache.org/jira/browse/HIVE-2771
This patch adds support for key range scans pushdown to hbase for keys of type
string. With this patch filter pushdowns of following types are supported:
a) Point lookups for keys of any types.
b) Range scans for keys of type string.
Test Plan:
Added hbase_ppd_key_range.q which is modeled after hbase_pushdown.q
This is a subtask of HIVE-1643
Test Plan: EMPTY
Reviewers: JIRA, jsichi, cwsteinbach
Reviewed By: cwsteinbach
CC: jsichi, ashutoshc
Differential Revision: https://reviews.facebook.net/D1551
Added:
hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
Modified:
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java Tue Mar 6 20:10:26 2012
@@ -43,7 +43,6 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
-import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -288,10 +287,13 @@ public class HBaseStorageHandler extends
org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS);
List<String> columnNames =
Arrays.asList(columnNameProperty.split(","));
+
HBaseSerDe hbaseSerde = (HBaseSerDe) deserializer;
+ String keyColName = columnNames.get(hbaseSerde.getKeyColumnOffset());
+ String keyColType = jobConf.get(org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES).
+ split(",")[hbaseSerde.getKeyColumnOffset()];
IndexPredicateAnalyzer analyzer =
- HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(
- columnNames.get(hbaseSerde.getKeyColumnOffset()));
+ HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(keyColName, keyColType);
List<IndexSearchCondition> searchConditions =
new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate =
Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java Tue Mar 6 20:10:26 2012
@@ -20,20 +20,16 @@ package org.apache.hadoop.hive.hbase;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.BinaryComparator;
-import org.apache.hadoop.hbase.filter.CompareFilter;
-import org.apache.hadoop.hbase.filter.RowFilter;
-import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
@@ -44,7 +40,6 @@ import org.apache.hadoop.hive.ql.exec.Ut
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.serde.Constants;
@@ -55,7 +50,6 @@ import org.apache.hadoop.hive.serde2.laz
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
@@ -64,7 +58,6 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/**
@@ -249,12 +242,9 @@ public class HiveHBaseTableInputFormat e
ExprNodeDesc filterExpr =
Utilities.deserializeExpression(filterExprSerialized, jobConf);
- String columnNameProperty = jobConf.get(Constants.LIST_COLUMNS);
- List<String> columnNames =
- Arrays.asList(columnNameProperty.split(","));
-
- IndexPredicateAnalyzer analyzer =
- newIndexPredicateAnalyzer(columnNames.get(iKey));
+ String colName = jobConf.get(Constants.LIST_COLUMNS).split(",")[iKey];
+ String colType = jobConf.get(Constants.LIST_COLUMN_TYPES).split(",")[iKey];
+ IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(colName,colType);
List<IndexSearchCondition> searchConditions =
new ArrayList<IndexSearchCondition>();
@@ -279,7 +269,7 @@ public class HiveHBaseTableInputFormat e
IndexSearchCondition sc = searchConditions.get(0);
ExprNodeConstantEvaluator eval =
new ExprNodeConstantEvaluator(sc.getConstantDesc());
- byte [] startRow;
+ byte [] row;
try {
ObjectInspector objInspector = eval.initialize(null);
Object writable = eval.evaluate(null);
@@ -291,18 +281,33 @@ public class HiveHBaseTableInputFormat e
false,
(byte) 0,
null);
- startRow = new byte[serializeStream.getCount()];
+ row = new byte[serializeStream.getCount()];
System.arraycopy(
serializeStream.getData(), 0,
- startRow, 0, serializeStream.getCount());
+ row, 0, serializeStream.getCount());
} catch (HiveException ex) {
throw new IOException(ex);
}
- // stopRow is exclusive, so pad it with a trailing 0 byte to
- // make it compare as the very next value after startRow
- byte [] stopRow = new byte[startRow.length + 1];
- System.arraycopy(startRow, 0, stopRow, 0, startRow.length);
+ byte [] startRow = HConstants.EMPTY_START_ROW, stopRow = HConstants.EMPTY_END_ROW;
+ String comparisonOp = sc.getComparisonOp();
+ if("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)){
+ startRow = row;
+ stopRow = getNextBA(row);
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)){
+ stopRow = row;
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan"
+ .equals(comparisonOp)) {
+ startRow = row;
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan"
+ .equals(comparisonOp)){
+ startRow = getNextBA(row);
+ } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan"
+ .equals(comparisonOp)){
+ stopRow = getNextBA(row);
+ } else {
+ throw new IOException(comparisonOp + " is not a supported comparison operator");
+ }
if (tableSplit != null) {
tableSplit = new TableSplit(
@@ -313,18 +318,17 @@ public class HiveHBaseTableInputFormat e
}
scan.setStartRow(startRow);
scan.setStopRow(stopRow);
-
- // Add a WhileMatchFilter to make the scan terminate as soon
- // as we see a non-matching key. This is probably redundant
- // since the stopRow above should already take care of it for us.
- scan.setFilter(
- new WhileMatchFilter(
- new RowFilter(
- CompareFilter.CompareOp.EQUAL,
- new BinaryComparator(startRow))));
return tableSplit;
}
+ private byte[] getNextBA(byte[] current){
+ // startRow is inclusive while stopRow is exclusive,
+ //this util method returns very next bytearray which will occur after the current one
+ // by padding current one with a trailing 0 byte.
+ byte[] next = new byte[current.length + 1];
+ System.arraycopy(current, 0, next, 0, current.length);
+ return next;
+ }
/**
* Instantiates a new predicate analyzer suitable for
* determining how to push a filter down into the HBase scan,
@@ -335,13 +339,18 @@ public class HiveHBaseTableInputFormat e
* @return preconfigured predicate analyzer
*/
static IndexPredicateAnalyzer newIndexPredicateAnalyzer(
- String keyColumnName) {
+ String keyColumnName, String keyColType) {
IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
- // for now, we only support equality comparisons
- analyzer.addComparisonOp(
- "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual");
+ if(keyColType.equalsIgnoreCase("string")){
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic." +
+ "GenericUDFOPEqualOrGreaterThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan");
+ analyzer.addComparisonOp("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan");
+ }
// and only on the key column
analyzer.clearAllowedColumnNames();
Added: hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q?rev=1297675&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q (added)
+++ hive/trunk/hbase-handler/src/test/queries/hbase_ppd_key_range.q Tue Mar 6 20:10:26 2012
@@ -0,0 +1,67 @@
+CREATE TABLE hbase_pushdown(key string, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string");
+
+INSERT OVERWRITE TABLE hbase_pushdown
+SELECT cast(key as string), value
+FROM src;
+
+-- with full pushdown
+explain select * from hbase_pushdown where key>'90';
+
+select * from hbase_pushdown where key>'90';
+select * from hbase_pushdown where key<'1';
+select * from hbase_pushdown where key<='2';
+select * from hbase_pushdown where key>='90';
+
+-- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%';
+
+select * from hbase_pushdown where key>'90' and value like '%9%';
+
+-- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int);
+
+select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int);
+
+
+-- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%';
+
+select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%';
+
+-- with nothing to push down
+
+explain select * from hbase_pushdown;
+
+-- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3;
+
+-- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%';
+
+-- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281'
+and key < '287';
+
+select * from hbase_pushdown where key > '281'
+and key < '287';
+
+set hive.optimize.ppd.storage=false;
+
+-- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90';
Added: hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out?rev=1297675&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out (added)
+++ hive/trunk/hbase-handler/src/test/results/hbase_ppd_key_range.q.out Tue Mar 6 20:10:26 2012
@@ -0,0 +1,585 @@
+PREHOOK: query: CREATE TABLE hbase_pushdown(key string, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE hbase_pushdown(key string, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:string")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown
+SELECT cast(key as string), value
+FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_pushdown
+POSTHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown
+SELECT cast(key as string), value
+FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: -- with full pushdown
+explain select * from hbase_pushdown where key>'90'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with full pushdown
+explain select * from hbase_pushdown where key>'90'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) '90'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ filterExpr:
+ expr: (key > '90')
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key > '90')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key>'90'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>'90'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+92 val_92
+95 val_95
+96 val_96
+97 val_97
+98 val_98
+PREHOOK: query: select * from hbase_pushdown where key<'1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key<'1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+0 val_0
+PREHOOK: query: select * from hbase_pushdown where key<='2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key<='2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+0 val_0
+10 val_10
+100 val_100
+103 val_103
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+119 val_119
+12 val_12
+120 val_120
+125 val_125
+126 val_126
+128 val_128
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+136 val_136
+137 val_137
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+149 val_149
+15 val_15
+150 val_150
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+165 val_165
+166 val_166
+167 val_167
+168 val_168
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+174 val_174
+175 val_175
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+192 val_192
+193 val_193
+194 val_194
+195 val_195
+196 val_196
+197 val_197
+199 val_199
+2 val_2
+PREHOOK: query: select * from hbase_pushdown where key>='90'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>='90'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+90 val_90
+92 val_92
+95 val_95
+96 val_96
+97 val_97
+98 val_98
+PREHOOK: query: -- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with partial pushdown
+
+explain select * from hbase_pushdown where key>'90' and value like '%9%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '90') (like (TOK_TABLE_OR_COL value) '%9%')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ filterExpr:
+ expr: (key > '90')
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (value like '%9%')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key>'90' and value like '%9%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key>'90' and value like '%9%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+92 val_92
+95 val_95
+96 val_96
+97 val_97
+98 val_98
+PREHOOK: query: -- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with two residuals
+
+explain select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (>= (TOK_TABLE_OR_COL key) '90') (like (TOK_TABLE_OR_COL value) '%9%')) (= (TOK_TABLE_OR_COL key) (TOK_FUNCTION TOK_INT (TOK_TABLE_OR_COL value)))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ filterExpr:
+ expr: (key >= '90')
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: ((value like '%9%') and (key = UDFToInteger(value)))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown
+where key>='90' and value like '%9%' and key=cast(value as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+PREHOOK: query: -- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with contradictory pushdowns
+
+explain select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (< (TOK_TABLE_OR_COL key) '80') (> (TOK_TABLE_OR_COL key) '90')) (like (TOK_TABLE_OR_COL value) '%90%')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ Filter Operator
+ predicate:
+ expr: (((key < '80') and (key > '90')) and (value like '%90%'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown
+where key<'80' and key>'90' and value like '%90%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+PREHOOK: query: -- with nothing to push down
+
+explain select * from hbase_pushdown
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with nothing to push down
+
+explain select * from hbase_pushdown
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with a predicate which is not actually part of the filter, so
+-- it should be ignored by pushdown
+
+explain select * from hbase_pushdown
+where (case when key<'90' then 2 else 4 end) > 3
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_FUNCTION when (< (TOK_TABLE_OR_COL key) '90') 2 4) 3))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ Filter Operator
+ predicate:
+ expr: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with a predicate which is under an OR, so it should
+-- be ignored by pushdown
+
+explain select * from hbase_pushdown
+where key<='80' or value like '%90%'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (<= (TOK_TABLE_OR_COL key) '80') (like (TOK_TABLE_OR_COL value) '%90%')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ Filter Operator
+ predicate:
+ expr: ((key <= '80') or (value like '%90%'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: -- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281'
+and key < '287'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- following will not be pushed into hbase
+explain select * from hbase_pushdown where key > '281'
+and key < '287'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (> (TOK_TABLE_OR_COL key) '281') (< (TOK_TABLE_OR_COL key) '287')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ Filter Operator
+ predicate:
+ expr: ((key > '281') and (key < '287'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select * from hbase_pushdown where key > '281'
+and key < '287'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key > '281'
+and key < '287'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+PREHOOK: query: -- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- with pushdown disabled
+
+explain select * from hbase_pushdown where key<='90'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_pushdown))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL key) '90'))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ hbase_pushdown
+ TableScan
+ alias: hbase_pushdown
+ Filter Operator
+ predicate:
+ expr: (key <= '90')
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Mar 6 20:10:26 2012
@@ -1916,6 +1916,22 @@ public final class Utilities {
jobConf.set(Constants.LIST_COLUMNS, columnNamesString);
}
+ public static void setColumnTypeList(JobConf jobConf, Operator op) {
+ RowSchema rowSchema = op.getSchema();
+ if (rowSchema == null) {
+ return;
+ }
+ StringBuilder columnTypes = new StringBuilder();
+ for (ColumnInfo colInfo : rowSchema.getSignature()) {
+ if (columnTypes.length() > 0) {
+ columnTypes.append(",");
+ }
+ columnTypes.append(colInfo.getType().getTypeName());
+ }
+ String columnTypesString = columnTypes.toString();
+ jobConf.set(Constants.LIST_COLUMN_TYPES, columnTypesString);
+ }
+
public static void validatePartSpec(Table tbl, Map<String, String> partSpec)
throws SemanticException {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Mar 6 20:10:26 2012
@@ -346,9 +346,9 @@ public class HiveInputFormat<K extends W
return;
}
- // construct column name list for reference by filter push down
+ // construct column name list and types for reference by filter push down
Utilities.setColumnNameList(jobConf, tableScan);
-
+ Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
if (filterExpr == null) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=1297675&r1=1297674&r2=1297675&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Tue Mar 6 20:10:26 2012
@@ -782,6 +782,7 @@ public final class OpProcFactory {
(HiveStoragePredicateHandler) storageHandler;
JobConf jobConf = new JobConf(owi.getParseContext().getConf());
Utilities.setColumnNameList(jobConf, tableScanOp);
+ Utilities.setColumnTypeList(jobConf, tableScanOp);
Utilities.copyTableJobPropertiesToConf(
Utilities.getTableDesc(tbl),
jobConf);