You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2020/10/10 07:33:40 UTC

[hive] branch master updated: HIVE-22826: ALTER TABLE RENAME COLUMN doesn't update list of bucketed column names (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 3e8848a  HIVE-22826: ALTER TABLE RENAME COLUMN doesn't update list of bucketed column names (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)
3e8848a is described below

commit 3e8848aa60c85ea53961a394df5d61dc4183924e
Author: Ashish Kumar Sharma <as...@gmail.com>
AuthorDate: Sat Oct 10 07:33:29 2020 +0000

    HIVE-22826: ALTER TABLE RENAME COLUMN doesn't update list of bucketed column names (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)
    
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
    Closes (#1528)
---
 .../change/AlterTableChangeColumnAnalyzer.java     |  4 +-
 .../change/AlterTableChangeColumnOperation.java    |  9 +-
 .../clientpositive/alt_bucket_tlb_change_col.q     | 10 +++
 .../alter_numbuckets_partitioned_table_h23.q       |  6 ++
 .../llap/alt_bucket_tlb_change_col.q.out           | 97 ++++++++++++++++++++++
 .../alter_numbuckets_partitioned_table_h23.q.out   | 51 +++++++++++-
 .../hadoop/hive/metastore/HiveAlterHandler.java    |  9 ++
 .../apache/hadoop/hive/metastore/ObjectStore.java  |  1 +
 .../hive/metastore/utils/MetaStoreServerUtils.java | 30 +++++++
 .../hadoop/hive/metastore/TestHiveMetaStore.java   | 60 +++++++++++++
 10 files changed, 272 insertions(+), 5 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
index 5b9730b..a818075 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
@@ -60,8 +60,8 @@ public class AlterTableChangeColumnAnalyzer extends AbstractAlterTableAnalyzer {
   protected void analyzeCommand(TableName tableName, Map<String, String> partitionSpec, ASTNode command)
       throws SemanticException {
     //col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT]
-    String oldColumnName = command.getChild(0).getText();
-    String newColumnName = command.getChild(1).getText();
+    String oldColumnName = command.getChild(0).getText().toLowerCase();
+    String newColumnName = command.getChild(1).getText().toLowerCase();
     String newType = getTypeStringFromAST((ASTNode) command.getChild(2));
 
     Table table = getTable(tableName);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
index 806e3eb..e65e256 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.ddl.table.column.change;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
@@ -61,17 +62,21 @@ public class AlterTableChangeColumnOperation extends AbstractAlterTableOperation
     int i = 1;
 
     List<FieldSchema> oldColumns = (partition == null ? table.getColsForMetastore() : partition.getColsForMetastore());
-    List<FieldSchema> newColumns = new ArrayList<FieldSchema>();
+    List<FieldSchema> newColumns = new ArrayList<>();
     for (FieldSchema oldColumn : oldColumns) {
       String oldColumnName = oldColumn.getName();
       if (oldColumnName.equalsIgnoreCase(desc.getOldColumnName())) {
-        oldColumn.setName(desc.getNewColumnName());
+        oldColumn.setName(desc.getNewColumnName().toLowerCase());
         if (StringUtils.isNotBlank(desc.getNewColumnType())) {
           oldColumn.setType(desc.getNewColumnType());
         }
         if (desc.getNewColumnComment() != null) {
           oldColumn.setComment(desc.getNewColumnComment());
         }
+        if (CollectionUtils.isNotEmpty(sd.getBucketCols()) && sd.getBucketCols().contains(oldColumnName)) {
+          sd.getBucketCols().remove(oldColumnName);
+          sd.getBucketCols().add(desc.getNewColumnName().toLowerCase());
+        }
         found = true;
         if (desc.isFirst() || StringUtils.isNotBlank(desc.getAfterColumn())) {
           column = oldColumn;
diff --git a/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q b/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q
new file mode 100644
index 0000000..5a9b071
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q
@@ -0,0 +1,10 @@
+--! qt:dataset:src
+create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets;
+
+describe formatted alter_bucket_change_col_t1;
+
+-- Test changing name of bucket column
+
+alter table alter_bucket_change_col_t1 change Serial_num Key string;
+
+describe formatted alter_bucket_change_col_t1;
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
index d4e1a19..eaa951a 100644
--- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
+++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
@@ -52,6 +52,12 @@ alter table tst1_n1 clustered by (value) into 12 buckets;
 
 describe formatted tst1_n1;
 
+-- Test changing name of bucket column
+
+alter table tst1_n1 change key keys string;
+
+describe formatted tst1_n1;
+
 -- Test removing buckets
 
 alter table tst1_n1 not clustered;
diff --git a/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out b/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out
new file mode 100644
index 0000000..006b53b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out
@@ -0,0 +1,97 @@
+PREHOOK: query: create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alter_bucket_change_col_t1
+POSTHOOK: query: create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alter_bucket_change_col_t1
+PREHOOK: query: describe formatted alter_bucket_change_col_t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: query: describe formatted alter_bucket_change_col_t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+# col_name            	data_type           	comment             
+serial_num          	string              	                    
+value               	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	bucketing_version   	2                   
+	numFiles            	0                   
+	numPartitions       	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	totalSize           	0                   
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	10                  	 
+Bucket Columns:     	[serial_num]        	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: alter table alter_bucket_change_col_t1 change Serial_num Key string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alter_bucket_change_col_t1
+PREHOOK: Output: default@alter_bucket_change_col_t1
+POSTHOOK: query: alter table alter_bucket_change_col_t1 change Serial_num Key string
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: Output: default@alter_bucket_change_col_t1
+PREHOOK: query: describe formatted alter_bucket_change_col_t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: query: describe formatted alter_bucket_change_col_t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+# col_name            	data_type           	comment             
+key                 	string              	                    
+value               	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	bucketing_version   	2                   
+#### A masked pattern was here ####
+	numFiles            	0                   
+	numPartitions       	0                   
+	numRows             	0                   
+	rawDataSize         	0                   
+	totalSize           	0                   
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	10                  	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
diff --git a/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
index c59a5fc..8b0a942 100644
--- a/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
+++ b/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
@@ -517,6 +517,55 @@ Bucket Columns:     	[value]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	serialization.format	1                   
+PREHOOK: query: alter table tst1_n1 change key keys string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@tst1_n1
+PREHOOK: Output: default@tst1_n1
+POSTHOOK: query: alter table tst1_n1 change key keys string
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@tst1_n1
+POSTHOOK: Output: default@tst1_n1
+PREHOOK: query: describe formatted tst1_n1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tst1_n1
+POSTHOOK: query: describe formatted tst1_n1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tst1_n1
+# col_name            	data_type           	comment             
+keys                	string              	                    
+value               	string              	                    
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+ds                  	string              	                    
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	bucketing_version   	2                   
+#### A masked pattern was here ####
+	numFiles            	12                  
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	12                  	 
+Bucket Columns:     	[value]             	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
 PREHOOK: query: alter table tst1_n1 not clustered
 PREHOOK: type: ALTERTABLE_CLUSTER_SORT
 PREHOOK: Input: default@tst1_n1
@@ -532,7 +581,7 @@ POSTHOOK: query: describe formatted tst1_n1
 POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@tst1_n1
 # col_name            	data_type           	comment             
-key                 	string              	                    
+keys                	string              	                    
 value               	string              	                    
 	 	 
 # Partition Information	 	 
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 15b3ef7..cf4b85d 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -22,6 +22,7 @@ import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Lists;
 
 import com.google.common.collect.Multimap;
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.common.repl.ReplConst;
 import org.apache.hadoop.hive.common.TableName;
@@ -130,6 +131,14 @@ public class HiveAlterHandler implements AlterHandler {
       throw new InvalidOperationException("Invalid column " + validate);
     }
 
+    // Validate bucketedColumns in new table
+    List<String> bucketColumns = MetaStoreServerUtils.validateBucketColumns(newt.getSd());
+    if (CollectionUtils.isNotEmpty(bucketColumns)) {
+      String errMsg = "Bucket columns - " + bucketColumns.toString() + " doesn't match with any table columns";
+      LOG.error(errMsg);
+      throw new InvalidOperationException(errMsg);
+    }
+
     Path srcPath = null;
     FileSystem srcFs;
     Path destPath = null;
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 1296bae..71d2de4 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -4823,6 +4823,7 @@ public class ObjectStore implements RawStore, Configurable {
         Map<Integer, Integer> mapping = new HashMap<>();
         for (int i = 0; i < oldCols.size(); i++) {
           FieldSchema oldCol = oldCols.get(i);
+          //TODO: replace for loop with list.indexOf()
           for (int j = 0; j < newCols.size(); j++) {
             FieldSchema newCol = newCols.get(j);
             if (oldCol.equals(newCol)) {
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
index 7541518..40738f9 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
@@ -47,6 +47,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Predicates;
@@ -1560,4 +1561,33 @@ public class MetaStoreServerUtils {
     }
     return tpart;
   }
+
+  /**
+   * Validate bucket columns should belong to table columns.
+   * @param sd StorageDescriptor of given table
+   * @return true if bucket columns are empty or belong to table columns else false
+   */
+  public static List<String> validateBucketColumns(StorageDescriptor sd) {
+    List<String> bucketColumnNames = null;
+
+    if (CollectionUtils.isNotEmpty(sd.getBucketCols())) {
+      bucketColumnNames = sd.getBucketCols().stream().map(String::toLowerCase).collect(Collectors.toList());
+      List<String> columnNames = getColumnNames(sd.getCols());
+      if (CollectionUtils.isNotEmpty(columnNames))
+        bucketColumnNames.removeAll(columnNames);
+    }
+    return bucketColumnNames;
+  }
+
+  /**
+   * Generate list of lower case column names from the fieldSchema list
+   * @param cols fieldSchema list
+   * @return column name list
+   */
+  public static List<String> getColumnNames(List<FieldSchema> cols) {
+    if (CollectionUtils.isNotEmpty(cols)) {
+      return cols.stream().map(FieldSchema::getName).map(String::toLowerCase).collect(Collectors.toList());
+    }
+    return null;
+  }
 }
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index ec639f9..7018c89 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -3428,4 +3428,64 @@ public abstract class TestHiveMetaStore {
     m.invoke(hms, tbl, part, false, null);
     verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
   }
+
+
+  public void testAlterTableRenameBucketedColumnPositive() throws Exception {
+    String dbName = "alterTblDb";
+    String tblName = "altertbl";
+
+    client.dropTable(dbName, tblName);
+    silentDropDatabase(dbName);
+
+    new DatabaseBuilder().setName(dbName).create(client, conf);
+
+    ArrayList<FieldSchema> origCols = new ArrayList<>(2);
+    origCols.add(new FieldSchema("originalColName", ColumnType.STRING_TYPE_NAME, ""));
+    origCols.add(new FieldSchema("income", ColumnType.INT_TYPE_NAME, ""));
+    Table origTbl = new TableBuilder().setDbName(dbName).setTableName(tblName).setCols(origCols)
+        .setBucketCols(Lists.newArrayList("originalColName")).build(conf);
+    client.createTable(origTbl);
+
+    // Rename bucketed column positive case
+    ArrayList<FieldSchema> colsUpdated = new ArrayList<>(origCols);
+    colsUpdated.set(0, new FieldSchema("updatedColName1", ColumnType.STRING_TYPE_NAME, ""));
+    Table tblUpdated = client.getTable(dbName, tblName);
+    tblUpdated.getSd().setCols(colsUpdated);
+    tblUpdated.getSd().getBucketCols().set(0, colsUpdated.get(0).getName());
+    client.alter_table(dbName, tblName, tblUpdated);
+
+    Table resultTbl = client.getTable(dbName, tblUpdated.getTableName());
+    assertEquals("Num bucketed columns is not 1 ", 1, resultTbl.getSd().getBucketCols().size());
+    assertEquals("Bucketed column names incorrect", colsUpdated.get(0).getName(),
+        resultTbl.getSd().getBucketCols().get(0));
+
+    silentDropDatabase(dbName);
+  }
+
+  @Test(expected = InvalidOperationException.class)
+  public void testAlterTableRenameBucketedColumnNegative() throws Exception {
+    String dbName = "alterTblDb";
+    String tblName = "altertbl";
+
+    client.dropTable(dbName, tblName);
+    silentDropDatabase(dbName);
+
+    new DatabaseBuilder().setName(dbName).create(client, conf);
+
+    ArrayList<FieldSchema> origCols = new ArrayList<>(2);
+    origCols.add(new FieldSchema("originalColName", ColumnType.STRING_TYPE_NAME, ""));
+    origCols.add(new FieldSchema("income", ColumnType.INT_TYPE_NAME, ""));
+    Table origTbl = new TableBuilder().setDbName(dbName).setTableName(tblName).setCols(origCols)
+        .setBucketCols(Lists.newArrayList("originalColName")).build(conf);
+    client.createTable(origTbl);
+
+    // Rename bucketed column negative case
+    ArrayList<FieldSchema> colsUpdated = new ArrayList<>(origCols);
+    colsUpdated.set(0, new FieldSchema("updatedColName1", ColumnType.STRING_TYPE_NAME, ""));
+    Table tblUpdated = client.getTable(dbName, tblName);
+    tblUpdated.getSd().setCols(colsUpdated);
+    client.alter_table(dbName, tblName, tblUpdated);
+
+    silentDropDatabase(dbName);
+  }
 }