You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/11/24 21:04:52 UTC

svn commit: r1641464 - in /hive/branches/branch-0.14/ql/src: java/org/apache/hadoop/hive/ql/metadata/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Mon Nov 24 20:04:51 2014
New Revision: 1641464

URL: http://svn.apache.org/r1641464
Log:
HIVE-5631 : Index creation on a skew table fails (Venki Korukanti via Ashutosh Chauhan)

Added:
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/index_skewtable.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/index_skewtable.q.out
Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/authorization_index.q.out

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1641464&r1=1641463&r2=1641464&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Nov 24 20:04:51 2014
@@ -92,6 +92,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
 import org.apache.hadoop.hive.metastore.api.SkewedInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -748,8 +749,9 @@ public class Hive {
         throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
       }
 
-      org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor = baseTbl.getSd().deepCopy();
-      SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
+      SerDeInfo serdeInfo = new SerDeInfo();
+      serdeInfo.setName(indexTblName);
+
       if(serde != null) {
         serdeInfo.setSerializationLib(serde);
       } else {
@@ -762,6 +764,7 @@ public class Hive {
         }
       }
 
+      serdeInfo.setParameters(new HashMap<String, String>());
       if (fieldDelim != null) {
         serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim);
         serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim);
@@ -788,18 +791,8 @@ public class Hive {
         }
       }
 
-      storageDescriptor.setLocation(null);
-      if (location != null) {
-        storageDescriptor.setLocation(location);
-      }
-      storageDescriptor.setInputFormat(inputFormat);
-      storageDescriptor.setOutputFormat(outputFormat);
-
-      Map<String, String> params = new HashMap<String,String>();
-
       List<FieldSchema> indexTblCols = new ArrayList<FieldSchema>();
       List<Order> sortCols = new ArrayList<Order>();
-      storageDescriptor.setBucketCols(null);
       int k = 0;
       Table metaBaseTbl = new Table(baseTbl);
       for (int i = 0; i < metaBaseTbl.getCols().size(); i++) {
@@ -815,9 +808,6 @@ public class Hive {
             "Check the index columns, they should appear in the table being indexed.");
       }
 
-      storageDescriptor.setCols(indexTblCols);
-      storageDescriptor.setSortCols(sortCols);
-
       int time = (int) (System.currentTimeMillis() / 1000);
       org.apache.hadoop.hive.metastore.api.Table tt = null;
       HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
@@ -851,8 +841,21 @@ public class Hive {
 
       String tdname = Utilities.getDatabaseName(tableName);
       String ttname = Utilities.getTableName(tableName);
+
+      StorageDescriptor indexSd = new StorageDescriptor(
+          indexTblCols,
+          location,
+          inputFormat,
+          outputFormat,
+          false/*compressed - not used*/,
+          -1/*numBuckets - default is -1 when the table has no buckets*/,
+          serdeInfo,
+          null/*bucketCols*/,
+          sortCols,
+          null/*parameters*/);
+
       Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, indexTblName,
-          storageDescriptor, params, deferredRebuild);
+          indexSd, new HashMap<String,String>(), deferredRebuild);
       if (indexComment != null) {
         indexDesc.getParameters().put("comment", indexComment);
       }

Added: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/index_skewtable.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/index_skewtable.q?rev=1641464&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/index_skewtable.q (added)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/index_skewtable.q Mon Nov 24 20:04:51 2014
@@ -0,0 +1,22 @@
+-- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv;
+
+-- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD;
+DESCRIBE FORMATTED default__kv_kv_index__;
+ALTER INDEX kv_index ON kv REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value;
+SELECT * FROM kv WHERE value > '15' ORDER BY value;
+
+DROP INDEX kv_index ON kv;
+DROP TABLE kv;

Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/authorization_index.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/authorization_index.q.out?rev=1641464&r1=1641463&r2=1641464&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/authorization_index.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/authorization_index.q.out Mon Nov 24 20:04:51 2014
@@ -43,8 +43,6 @@ Compressed:         	No                 
 Num Buckets:        	-1                  	 
 Bucket Columns:     	[]                  	 
 Sort Columns:       	[Order(col:a, order:1)]	 
-Storage Desc Params:	 	 
-	serialization.format	1                   
 PREHOOK: query: alter index t1_index on t1 rebuild
 PREHOOK: type: ALTERINDEX_REBUILD
 PREHOOK: Input: default@t1

Added: hive/branches/branch-0.14/ql/src/test/results/clientpositive/index_skewtable.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/index_skewtable.q.out?rev=1641464&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/index_skewtable.q.out (added)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/index_skewtable.q.out Mon Nov 24 20:04:51 2014
@@ -0,0 +1,216 @@
+PREHOOK: query: -- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@kv
+POSTHOOK: query: -- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@kv
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@kv
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@kv
+PREHOOK: query: -- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: default@kv
+POSTHOOK: query: -- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@default__kv_kv_index__
+PREHOOK: query: DESCRIBE FORMATTED default__kv_kv_index__
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@default__kv_kv_index__
+POSTHOOK: query: DESCRIBE FORMATTED default__kv_kv_index__
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@default__kv_kv_index__
+# col_name            	data_type           	comment             
+	 	 
+value               	string              	                    
+_bucketname         	string              	                    
+_offsets            	array<bigint>       	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	INDEX_TABLE         	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[Order(col:value, order:1)]	 
+PREHOOK: query: ALTER INDEX kv_index ON kv REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@kv
+PREHOOK: Output: default@default__kv_kv_index__
+POSTHOOK: query: ALTER INDEX kv_index ON kv REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@default__kv_kv_index__
+POSTHOOK: Lineage: default__kv_kv_index__._bucketname SIMPLE [(kv)kv.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__kv_kv_index__._offsets EXPRESSION [(kv)kv.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__kv_kv_index__.value SIMPLE [(kv)kv.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-3 is a root stage
+  Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6
+  Stage-5
+  Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+  Stage-1 depends on stages: Stage-2
+  Stage-4
+  Stage-6
+  Stage-7 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: default__kv_kv_index__
+            filterExpr: (value > '15') (type: boolean)
+            Filter Operator
+              predicate: (value > '15') (type: boolean)
+              Select Operator
+                expressions: _bucketname (type: string), _offsets (type: array<bigint>)
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-2
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: kv
+            filterExpr: (value > '15') (type: boolean)
+            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (value > '15') (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                  value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-7
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM kv WHERE value > '15' ORDER BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__kv_kv_index__
+PREHOOK: Input: default@kv
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM kv WHERE value > '15' ORDER BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__kv_kv_index__
+POSTHOOK: Input: default@kv
+#### A masked pattern was here ####
+8	18
+8	18
+2	22
+PREHOOK: query: DROP INDEX kv_index ON kv
+PREHOOK: type: DROPINDEX
+PREHOOK: Input: default@kv
+POSTHOOK: query: DROP INDEX kv_index ON kv
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Input: default@kv
+PREHOOK: query: DROP TABLE kv
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@kv
+PREHOOK: Output: default@kv
+POSTHOOK: query: DROP TABLE kv
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@kv