You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/11/14 22:58:27 UTC
svn commit: r1639796 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/metadata/Hive.java
test/queries/clientpositive/index_skewtable.q
test/results/clientpositive/authorization_index.q.out
test/results/clientpositive/index_skewtable.q.out
Author: hashutosh
Date: Fri Nov 14 21:58:26 2014
New Revision: 1639796
URL: http://svn.apache.org/r1639796
Log:
HIVE-5631 : Index creation on a skew table fails (Venki Korukanti via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/index_skewtable.q
hive/trunk/ql/src/test/results/clientpositive/index_skewtable.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
hive/trunk/ql/src/test/results/clientpositive/authorization_index.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1639796&r1=1639795&r2=1639796&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Fri Nov 14 21:58:26 2014
@@ -92,6 +92,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -748,8 +749,9 @@ public class Hive {
throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
}
- org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor = baseTbl.getSd().deepCopy();
- SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo();
+ SerDeInfo serdeInfo = new SerDeInfo();
+ serdeInfo.setName(indexTblName);
+
if(serde != null) {
serdeInfo.setSerializationLib(serde);
} else {
@@ -762,6 +764,7 @@ public class Hive {
}
}
+ serdeInfo.setParameters(new HashMap<String, String>());
if (fieldDelim != null) {
serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim);
serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim);
@@ -788,18 +791,8 @@ public class Hive {
}
}
- storageDescriptor.setLocation(null);
- if (location != null) {
- storageDescriptor.setLocation(location);
- }
- storageDescriptor.setInputFormat(inputFormat);
- storageDescriptor.setOutputFormat(outputFormat);
-
- Map<String, String> params = new HashMap<String,String>();
-
List<FieldSchema> indexTblCols = new ArrayList<FieldSchema>();
List<Order> sortCols = new ArrayList<Order>();
- storageDescriptor.setBucketCols(null);
int k = 0;
Table metaBaseTbl = new Table(baseTbl);
for (int i = 0; i < metaBaseTbl.getCols().size(); i++) {
@@ -815,9 +808,6 @@ public class Hive {
"Check the index columns, they should appear in the table being indexed.");
}
- storageDescriptor.setCols(indexTblCols);
- storageDescriptor.setSortCols(sortCols);
-
int time = (int) (System.currentTimeMillis() / 1000);
org.apache.hadoop.hive.metastore.api.Table tt = null;
HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
@@ -851,8 +841,21 @@ public class Hive {
String tdname = Utilities.getDatabaseName(tableName);
String ttname = Utilities.getTableName(tableName);
+
+ StorageDescriptor indexSd = new StorageDescriptor(
+ indexTblCols,
+ location,
+ inputFormat,
+ outputFormat,
+ false/*compressed - not used*/,
+ -1/*numBuckets - default is -1 when the table has no buckets*/,
+ serdeInfo,
+ null/*bucketCols*/,
+ sortCols,
+ null/*parameters*/);
+
Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, indexTblName,
- storageDescriptor, params, deferredRebuild);
+ indexSd, new HashMap<String,String>(), deferredRebuild);
if (indexComment != null) {
indexDesc.getParameters().put("comment", indexComment);
}
Added: hive/trunk/ql/src/test/queries/clientpositive/index_skewtable.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_skewtable.q?rev=1639796&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_skewtable.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_skewtable.q Fri Nov 14 21:58:26 2014
@@ -0,0 +1,22 @@
+-- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv;
+
+-- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD;
+DESCRIBE FORMATTED default__kv_kv_index__;
+ALTER INDEX kv_index ON kv REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value;
+SELECT * FROM kv WHERE value > '15' ORDER BY value;
+
+DROP INDEX kv_index ON kv;
+DROP TABLE kv;
Modified: hive/trunk/ql/src/test/results/clientpositive/authorization_index.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/authorization_index.q.out?rev=1639796&r1=1639795&r2=1639796&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/authorization_index.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/authorization_index.q.out Fri Nov 14 21:58:26 2014
@@ -43,8 +43,6 @@ Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: [Order(col:a, order:1)]
-Storage Desc Params:
- serialization.format 1
PREHOOK: query: alter index t1_index on t1 rebuild
PREHOOK: type: ALTERINDEX_REBUILD
PREHOOK: Input: default@t1
Added: hive/trunk/ql/src/test/results/clientpositive/index_skewtable.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_skewtable.q.out?rev=1639796&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_skewtable.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_skewtable.q.out Fri Nov 14 21:58:26 2014
@@ -0,0 +1,216 @@
+PREHOOK: query: -- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@kv
+POSTHOOK: query: -- Test creating an index on skewed table
+
+-- Create a skew table
+CREATE TABLE kv(key STRING, value STRING) SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@kv
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@kv
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE kv
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@kv
+PREHOOK: query: -- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: default@kv
+POSTHOOK: query: -- Create and build an index
+CREATE INDEX kv_index ON TABLE kv(value) AS 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@default__kv_kv_index__
+PREHOOK: query: DESCRIBE FORMATTED default__kv_kv_index__
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@default__kv_kv_index__
+POSTHOOK: query: DESCRIBE FORMATTED default__kv_kv_index__
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@default__kv_kv_index__
+# col_name data_type comment
+
+value string
+_bucketname string
+_offsets array<bigint>
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: INDEX_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: [Order(col:value, order:1)]
+PREHOOK: query: ALTER INDEX kv_index ON kv REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@kv
+PREHOOK: Output: default@default__kv_kv_index__
+POSTHOOK: query: ALTER INDEX kv_index ON kv REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@default__kv_kv_index__
+POSTHOOK: Lineage: default__kv_kv_index__._bucketname SIMPLE [(kv)kv.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__kv_kv_index__._offsets EXPRESSION [(kv)kv.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__kv_kv_index__.value SIMPLE [(kv)kv.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: -- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Run a query that uses the index
+EXPLAIN SELECT * FROM kv WHERE value > '15' ORDER BY value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-8 depends on stages: Stage-3 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
+ Stage-1 depends on stages: Stage-2
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: default.default__kv_kv_index__
+ filterExpr: (value > '15') (type: boolean)
+ Filter Operator
+ predicate: (value > '15') (type: boolean)
+ Select Operator
+ expressions: _bucketname (type: string), _offsets (type: array<bigint>)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-2
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: kv
+ filterExpr: (value > '15') (type: boolean)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (value > '15') (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM kv WHERE value > '15' ORDER BY value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__kv_kv_index__
+PREHOOK: Input: default@kv
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM kv WHERE value > '15' ORDER BY value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__kv_kv_index__
+POSTHOOK: Input: default@kv
+#### A masked pattern was here ####
+8 18
+8 18
+2 22
+PREHOOK: query: DROP INDEX kv_index ON kv
+PREHOOK: type: DROPINDEX
+PREHOOK: Input: default@kv
+POSTHOOK: query: DROP INDEX kv_index ON kv
+POSTHOOK: type: DROPINDEX
+POSTHOOK: Input: default@kv
+PREHOOK: query: DROP TABLE kv
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@kv
+PREHOOK: Output: default@kv
+POSTHOOK: query: DROP TABLE kv
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@kv
+POSTHOOK: Output: default@kv