You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/06/30 18:09:46 UTC
svn commit: r959361 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/parse/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Wed Jun 30 16:09:46 2010
New Revision: 959361
URL: http://svn.apache.org/viewvc?rev=959361&view=rev
Log:
HIVE-1443. Add an API to turn off bucketing (Paul Yang via namit)
M CHANGES.txt
A ql/src/test/results/clientpositive/alter4.q.out
A ql/src/test/queries/clientpositive/alter4.q
M ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
M ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
M ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/alter4.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/alter4.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=959361&r1=959360&r2=959361&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jun 30 16:09:46 2010
@@ -20,6 +20,8 @@ Trunk - Unreleased
HIVE-1430. Dont run serialize plan by default (Ning Zhang via namit)
+ HIVE-1443. Add an API to turn off bucketing (Paul Yang via namit)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=959361&r1=959360&r2=959361&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Jun 30 16:09:46 2010
@@ -1703,9 +1703,24 @@ public class DDLTask extends Task<DDLWor
Utilities.validateColumnNames(columns, Utilities
.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
}
- tbl.getTTable().getSd().setBucketCols(alterTbl.getBucketColumns());
- tbl.getTTable().getSd().setNumBuckets(alterTbl.getNumberBuckets());
- tbl.getTTable().getSd().setSortCols(alterTbl.getSortColumns());
+
+ int numBuckets = -1;
+ ArrayList<String> bucketCols = null;
+ ArrayList<Order> sortCols = null;
+
+ // -1 buckets means to turn off bucketing
+ if (alterTbl.getNumberBuckets() == -1) {
+ bucketCols = new ArrayList<String>();
+ sortCols = new ArrayList<Order>();
+ numBuckets = -1;
+ } else {
+ bucketCols = alterTbl.getBucketColumns();
+ sortCols = alterTbl.getSortColumns();
+ numBuckets = alterTbl.getNumberBuckets();
+ }
+ tbl.getTTable().getSd().setBucketCols(bucketCols);
+ tbl.getTTable().getSd().setNumBuckets(numBuckets);
+ tbl.getTTable().getSd().setSortCols(sortCols);
} else {
console.printError("Unsupported Alter commnad");
return 1;
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=959361&r1=959360&r2=959361&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Jun 30 16:09:46 2010
@@ -269,23 +269,31 @@ public class DDLSemanticAnalyzer extends
private void analyzeAlterTableClusterSort(ASTNode ast)
throws SemanticException {
String tableName = unescapeIdentifier(ast.getChild(0).getText());
- ASTNode buckets = (ASTNode) ast.getChild(1);
- List<String> bucketCols = getColumnNames((ASTNode) buckets.getChild(0));
- List<Order> sortCols = new ArrayList<Order>();
- int numBuckets = -1;
- if (buckets.getChildCount() == 2) {
- numBuckets = (Integer.valueOf(buckets.getChild(1).getText())).intValue();
+ if (ast.getChildCount() == 1) {
+ // This means that we want to turn off bucketing
+ AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, -1,
+ new ArrayList<String>(), new ArrayList<Order>());
+ rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+ alterTblDesc), conf));
} else {
- sortCols = getColumnNamesOrder((ASTNode) buckets.getChild(1));
- numBuckets = (Integer.valueOf(buckets.getChild(2).getText())).intValue();
- }
- if (numBuckets <= 0) {
- throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg());
+ ASTNode buckets = (ASTNode) ast.getChild(1);
+ List<String> bucketCols = getColumnNames((ASTNode) buckets.getChild(0));
+ List<Order> sortCols = new ArrayList<Order>();
+ int numBuckets = -1;
+ if (buckets.getChildCount() == 2) {
+ numBuckets = (Integer.valueOf(buckets.getChild(1).getText())).intValue();
+ } else {
+ sortCols = getColumnNamesOrder((ASTNode) buckets.getChild(1));
+ numBuckets = (Integer.valueOf(buckets.getChild(2).getText())).intValue();
+ }
+ if (numBuckets <= 0) {
+ throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg());
+ }
+ AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, numBuckets,
+ bucketCols, sortCols);
+ rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+ alterTblDesc), conf));
}
- AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, numBuckets,
- bucketCols, sortCols);
- rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
- alterTblDesc), conf));
}
static HashMap<String, String> getProps(ASTNode prop) {
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=959361&r1=959360&r2=959361&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Wed Jun 30 16:09:46 2010
@@ -403,6 +403,9 @@ alterStatementSuffixClusterbySortby
@after{msgs.pop();}
:name=Identifier tableBuckets
->^(TOK_ALTERTABLE_CLUSTER_SORT $name tableBuckets)
+ |
+ name=Identifier KW_NOT KW_CLUSTERED
+ ->^(TOK_ALTERTABLE_CLUSTER_SORT $name)
;
fileFormat
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/alter4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/alter4.q?rev=959361&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/alter4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/alter4.q Wed Jun 30 16:09:46 2010
@@ -0,0 +1,9 @@
+DROP TABLE set_bucketing_test;
+
+CREATE TABLE set_bucketing_test (key INT, value STRING) CLUSTERED BY (key) INTO 10 BUCKETS;
+DESCRIBE EXTENDED set_bucketing_test;
+
+ALTER TABLE set_bucketing_test NOT CLUSTERED;
+DESCRIBE EXTENDED set_bucketing_test;
+
+DROP TABLE set_bucketing_test;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/alter4.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/alter4.q.out?rev=959361&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/alter4.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/alter4.q.out Wed Jun 30 16:09:46 2010
@@ -0,0 +1,36 @@
+PREHOOK: query: DROP TABLE set_bucketing_test
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE set_bucketing_test
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE set_bucketing_test (key INT, value STRING) CLUSTERED BY (key) INTO 10 BUCKETS
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE set_bucketing_test (key INT, value STRING) CLUSTERED BY (key) INTO 10 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@set_bucketing_test
+PREHOOK: query: DESCRIBE EXTENDED set_bucketing_test
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE EXTENDED set_bucketing_test
+POSTHOOK: type: DESCTABLE
+key int
+value string
+
+Detailed Table Information Table(tableName:set_bucketing_test, dbName:default, owner:pyang, createTime:1277867951, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/test/data/warehouse/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:10, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}), partitionKeys:[], parameters:{transient_lastDdlTime=1277867951}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+PREHOOK: query: ALTER TABLE set_bucketing_test NOT CLUSTERED
+PREHOOK: type: null
+POSTHOOK: query: ALTER TABLE set_bucketing_test NOT CLUSTERED
+POSTHOOK: type: null
+POSTHOOK: Input: default@set_bucketing_test
+POSTHOOK: Output: default@set_bucketing_test
+PREHOOK: query: DESCRIBE EXTENDED set_bucketing_test
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE EXTENDED set_bucketing_test
+POSTHOOK: type: DESCTABLE
+key int
+value string
+
+Detailed Table Information Table(tableName:set_bucketing_test, dbName:default, owner:pyang, createTime:1277867951, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/test/data/warehouse/set_bucketing_test, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{last_modified_by=pyang, last_modified_time=1277867951, transient_lastDdlTime=1277867951}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+PREHOOK: query: DROP TABLE set_bucketing_test
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE set_bucketing_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@set_bucketing_test