You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/07 17:25:56 UTC
svn commit: r1500452 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/metadata/Hive.java
test/queries/clientpositive/index_serde.q
test/results/clientpositive/index_serde.q.out
Author: hashutosh
Date: Sun Jul 7 15:25:56 2013
New Revision: 1500452
URL: http://svn.apache.org/r1500452
Log:
HIVE-4251 : Indices can't be built on tables whose schema info comes from SerDe (Mark Wagner via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/index_serde.q
hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1500452&r1=1500451&r2=1500452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Sun Jul 7 15:25:56 2013
@@ -729,8 +729,9 @@ public class Hive {
List<Order> sortCols = new ArrayList<Order>();
storageDescriptor.setBucketCols(null);
int k = 0;
- for (int i = 0; i < storageDescriptor.getCols().size(); i++) {
- FieldSchema col = storageDescriptor.getCols().get(i);
+ Table metaBaseTbl = new Table(baseTbl);
+ for (int i = 0; i < metaBaseTbl.getCols().size(); i++) {
+ FieldSchema col = metaBaseTbl.getCols().get(i);
if (indexedCols.contains(col.getName())) {
indexTblCols.add(col);
sortCols.add(new Order(col.getName(), 1));
Added: hive/trunk/ql/src/test/queries/clientpositive/index_serde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_serde.q?rev=1500452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_serde.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_serde.q Sun Jul 7 15:25:56 2013
@@ -0,0 +1,49 @@
+-- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "testing.hive.avro.serde",
+ "name": "doctors",
+ "type": "record",
+ "fields": [
+ {
+ "name":"number",
+ "type":"int",
+ "doc":"Order of playing the role"
+ },
+ {
+ "name":"first_name",
+ "type":"string",
+ "doc":"first name of actor playing role"
+ },
+ {
+ "name":"last_name",
+ "type":"string",
+ "doc":"last name of actor playing role"
+ }
+ ]
+}');
+
+DESCRIBE doctors;
+
+LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+
+-- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD;
+DESCRIBE EXTENDED default__doctors_doctors_index__;
+ALTER INDEX doctors_index ON doctors REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number;
+SELECT * FROM doctors WHERE number > 6 ORDER BY number;
+
+DROP INDEX doctors_index ON doctors;
+DROP TABLE doctors;
Added: hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out?rev=1500452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out Sun Jul 7 15:25:56 2013
@@ -0,0 +1,280 @@
+PREHOOK: query: -- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "testing.hive.avro.serde",
+ "name": "doctors",
+ "type": "record",
+ "fields": [
+ {
+ "name":"number",
+ "type":"int",
+ "doc":"Order of playing the role"
+ },
+ {
+ "name":"first_name",
+ "type":"string",
+ "doc":"first name of actor playing role"
+ },
+ {
+ "name":"last_name",
+ "type":"string",
+ "doc":"last name of actor playing role"
+ }
+ ]
+}')
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+ "namespace": "testing.hive.avro.serde",
+ "name": "doctors",
+ "type": "record",
+ "fields": [
+ {
+ "name":"number",
+ "type":"int",
+ "doc":"Order of playing the role"
+ },
+ {
+ "name":"first_name",
+ "type":"string",
+ "doc":"first name of actor playing role"
+ },
+ {
+ "name":"last_name",
+ "type":"string",
+ "doc":"last name of actor playing role"
+ }
+ ]
+}')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@doctors
+PREHOOK: query: DESCRIBE doctors
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE doctors
+POSTHOOK: type: DESCTABLE
+number int from deserializer
+first_name string from deserializer
+last_name string from deserializer
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors
+PREHOOK: type: LOAD
+PREHOOK: Output: default@doctors
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@doctors
+PREHOOK: query: -- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Output: default@default__doctors_doctors_index__
+PREHOOK: query: DESCRIBE EXTENDED default__doctors_doctors_index__
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE EXTENDED default__doctors_doctors_index__
+POSTHOOK: type: DESCTABLE
+number int from deserializer
+_bucketname string
+_offsets array<bigint>
+
+#### A masked pattern was here ####
+PREHOOK: query: ALTER INDEX doctors_index ON doctors REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@doctors
+PREHOOK: Output: default@default__doctors_doctors_index__
+POSTHOOK: query: ALTER INDEX doctors_index ON doctors REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@doctors
+POSTHOOK: Output: default@default__doctors_doctors_index__
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+PREHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME doctors))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL number) 6)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL number)))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-1 depends on stages: Stage-2
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ default__doctors_doctors_index__
+ TableScan
+ alias: default__doctors_doctors_index__
+ filterExpr:
+ expr: (number > 6)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (number > 6)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _bucketname
+ type: string
+ expr: _offsets
+ type: array<bigint>
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ doctors
+ TableScan
+ alias: doctors
+ filterExpr:
+ expr: (number > 6)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (number > 6)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: number
+ type: int
+ expr: first_name
+ type: string
+ expr: last_name
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-6
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM doctors WHERE number > 6 ORDER BY number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__doctors_doctors_index__
+PREHOOK: Input: default@doctors
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM doctors WHERE number > 6 ORDER BY number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__doctors_doctors_index__
+POSTHOOK: Input: default@doctors
+#### A masked pattern was here ####
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+7 Sylvester McCoy
+8 Paul McGann
+9 Christopher Eccleston
+10 David Tennant
+11 Matt Smith
+PREHOOK: query: DROP INDEX doctors_index ON doctors
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX doctors_index ON doctors
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+PREHOOK: query: DROP TABLE doctors
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@doctors
+PREHOOK: Output: default@doctors
+POSTHOOK: query: DROP TABLE doctors
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@doctors
+POSTHOOK: Output: default@doctors
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]