You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/07 17:25:56 UTC

svn commit: r1500452 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/metadata/Hive.java test/queries/clientpositive/index_serde.q test/results/clientpositive/index_serde.q.out

Author: hashutosh
Date: Sun Jul  7 15:25:56 2013
New Revision: 1500452

URL: http://svn.apache.org/r1500452
Log:
HIVE-4251 : Indices can't be built on tables whose schema info comes from SerDe (Mark Wagner via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/index_serde.q
    hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1500452&r1=1500451&r2=1500452&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Sun Jul  7 15:25:56 2013
@@ -729,8 +729,9 @@ public class Hive {
       List<Order> sortCols = new ArrayList<Order>();
       storageDescriptor.setBucketCols(null);
       int k = 0;
-      for (int i = 0; i < storageDescriptor.getCols().size(); i++) {
-        FieldSchema col = storageDescriptor.getCols().get(i);
+      Table metaBaseTbl = new Table(baseTbl);
+      for (int i = 0; i < metaBaseTbl.getCols().size(); i++) {
+        FieldSchema col = metaBaseTbl.getCols().get(i);
         if (indexedCols.contains(col.getName())) {
           indexTblCols.add(col);
           sortCols.add(new Order(col.getName(), 1));

Added: hive/trunk/ql/src/test/queries/clientpositive/index_serde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_serde.q?rev=1500452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_serde.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_serde.q Sun Jul  7 15:25:56 2013
@@ -0,0 +1,49 @@
+-- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors 
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "testing.hive.avro.serde",
+  "name": "doctors",
+  "type": "record",
+  "fields": [
+    {
+      "name":"number",
+      "type":"int",
+      "doc":"Order of playing the role"
+    },
+    {
+      "name":"first_name",
+      "type":"string",
+      "doc":"first name of actor playing role"
+    },
+    {
+      "name":"last_name",
+      "type":"string",
+      "doc":"last name of actor playing role"
+    }
+  ]
+}');
+
+DESCRIBE doctors;
+
+LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors;
+
+-- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD;
+DESCRIBE EXTENDED default__doctors_doctors_index__;
+ALTER INDEX doctors_index ON doctors REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number;
+SELECT * FROM doctors WHERE number > 6 ORDER BY number;
+
+DROP INDEX doctors_index ON doctors;
+DROP TABLE doctors;

Added: hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out?rev=1500452&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_serde.q.out Sun Jul  7 15:25:56 2013
@@ -0,0 +1,280 @@
+PREHOOK: query: -- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors 
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "testing.hive.avro.serde",
+  "name": "doctors",
+  "type": "record",
+  "fields": [
+    {
+      "name":"number",
+      "type":"int",
+      "doc":"Order of playing the role"
+    },
+    {
+      "name":"first_name",
+      "type":"string",
+      "doc":"first name of actor playing role"
+    },
+    {
+      "name":"last_name",
+      "type":"string",
+      "doc":"last name of actor playing role"
+    }
+  ]
+}')
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Want to ensure we can build and use indices on tables stored with SerDes
+-- Build the (Avro backed) table
+CREATE TABLE doctors 
+ROW FORMAT
+SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+STORED AS
+INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "testing.hive.avro.serde",
+  "name": "doctors",
+  "type": "record",
+  "fields": [
+    {
+      "name":"number",
+      "type":"int",
+      "doc":"Order of playing the role"
+    },
+    {
+      "name":"first_name",
+      "type":"string",
+      "doc":"first name of actor playing role"
+    },
+    {
+      "name":"last_name",
+      "type":"string",
+      "doc":"last name of actor playing role"
+    }
+  ]
+}')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@doctors
+PREHOOK: query: DESCRIBE doctors
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE doctors
+POSTHOOK: type: DESCTABLE
+number              	int                 	from deserializer   
+first_name          	string              	from deserializer   
+last_name           	string              	from deserializer   
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors
+PREHOOK: type: LOAD
+PREHOOK: Output: default@doctors
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/doctors.avro' INTO TABLE doctors
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@doctors
+PREHOOK: query: -- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Create and build an index
+CREATE INDEX doctors_index ON TABLE doctors(number) AS 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Output: default@default__doctors_doctors_index__
+PREHOOK: query: DESCRIBE EXTENDED default__doctors_doctors_index__
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE EXTENDED default__doctors_doctors_index__
+POSTHOOK: type: DESCTABLE
+number              	int                 	from deserializer   
+_bucketname         	string              	                    
+_offsets            	array<bigint>       	                    
+	 	 
+#### A masked pattern was here ####
+PREHOOK: query: ALTER INDEX doctors_index ON doctors REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@doctors
+PREHOOK: Output: default@default__doctors_doctors_index__
+POSTHOOK: query: ALTER INDEX doctors_index ON doctors REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@doctors
+POSTHOOK: Output: default@default__doctors_doctors_index__
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+PREHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM doctors WHERE number > 6 ORDER BY number
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME doctors))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL number) 6)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL number)))))
+
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6
+  Stage-5
+  Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+  Stage-1 depends on stages: Stage-2
+  Stage-4
+  Stage-6
+  Stage-7 depends on stages: Stage-6
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        default__doctors_doctors_index__ 
+          TableScan
+            alias: default__doctors_doctors_index__
+            filterExpr:
+                expr: (number > 6)
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (number > 6)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _bucketname
+                      type: string
+                      expr: _offsets
+                      type: array<bigint>
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 1
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        doctors 
+          TableScan
+            alias: doctors
+            filterExpr:
+                expr: (number > 6)
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (number > 6)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: number
+                      type: int
+                      expr: first_name
+                      type: string
+                      expr: last_name
+                      type: string
+                outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: int
+                  sort order: +
+                  tag: -1
+                  value expressions:
+                        expr: _col0
+                        type: int
+                        expr: _col1
+                        type: string
+                        expr: _col2
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-7
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT * FROM doctors WHERE number > 6 ORDER BY number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__doctors_doctors_index__
+PREHOOK: Input: default@doctors
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM doctors WHERE number > 6 ORDER BY number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__doctors_doctors_index__
+POSTHOOK: Input: default@doctors
+#### A masked pattern was here ####
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+7	Sylvester	McCoy
+8	Paul	McGann
+9	Christopher	Eccleston
+10	David	Tennant
+11	Matt	Smith
+PREHOOK: query: DROP INDEX doctors_index ON doctors
+PREHOOK: type: DROPINDEX
+POSTHOOK: query: DROP INDEX doctors_index ON doctors
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]
+PREHOOK: query: DROP TABLE doctors
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@doctors
+PREHOOK: Output: default@doctors
+POSTHOOK: query: DROP TABLE doctors
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@doctors
+POSTHOOK: Output: default@doctors
+POSTHOOK: Lineage: default__doctors_doctors_index__._bucketname SIMPLE [(doctors)doctors.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__._offsets EXPRESSION [(doctors)doctors.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__doctors_doctors_index__.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ]