You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2020/10/10 07:33:40 UTC
[hive] branch master updated: HIVE-22826: ALTER TABLE RENAME COLUMN
doesn't update list of bucketed column names (Ashish Sharma,
reviewed by Adesh Rao, Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3e8848a HIVE-22826: ALTER TABLE RENAME COLUMN doesn't update list of bucketed column names (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)
3e8848a is described below
commit 3e8848aa60c85ea53961a394df5d61dc4183924e
Author: Ashish Kumar Sharma <as...@gmail.com>
AuthorDate: Sat Oct 10 07:33:29 2020 +0000
HIVE-22826: ALTER TABLE RENAME COLUMN doesn't update list of bucketed column names (Ashish Sharma, reviewed by Adesh Rao, Sankar Hariappan)
Signed-off-by: Sankar Hariappan <sa...@apache.org>
Closes (#1528)
---
.../change/AlterTableChangeColumnAnalyzer.java | 4 +-
.../change/AlterTableChangeColumnOperation.java | 9 +-
.../clientpositive/alt_bucket_tlb_change_col.q | 10 +++
.../alter_numbuckets_partitioned_table_h23.q | 6 ++
.../llap/alt_bucket_tlb_change_col.q.out | 97 ++++++++++++++++++++++
.../alter_numbuckets_partitioned_table_h23.q.out | 51 +++++++++++-
.../hadoop/hive/metastore/HiveAlterHandler.java | 9 ++
.../apache/hadoop/hive/metastore/ObjectStore.java | 1 +
.../hive/metastore/utils/MetaStoreServerUtils.java | 30 +++++++
.../hadoop/hive/metastore/TestHiveMetaStore.java | 60 +++++++++++++
10 files changed, 272 insertions(+), 5 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
index 5b9730b..a818075 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnAnalyzer.java
@@ -60,8 +60,8 @@ public class AlterTableChangeColumnAnalyzer extends AbstractAlterTableAnalyzer {
protected void analyzeCommand(TableName tableName, Map<String, String> partitionSpec, ASTNode command)
throws SemanticException {
//col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT]
- String oldColumnName = command.getChild(0).getText();
- String newColumnName = command.getChild(1).getText();
+ String oldColumnName = command.getChild(0).getText().toLowerCase();
+ String newColumnName = command.getChild(1).getText().toLowerCase();
String newType = getTypeStringFromAST((ASTNode) command.getChild(2));
Table table = getTable(tableName);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
index 806e3eb..e65e256 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/column/change/AlterTableChangeColumnOperation.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.ddl.table.column.change;
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
@@ -61,17 +62,21 @@ public class AlterTableChangeColumnOperation extends AbstractAlterTableOperation
int i = 1;
List<FieldSchema> oldColumns = (partition == null ? table.getColsForMetastore() : partition.getColsForMetastore());
- List<FieldSchema> newColumns = new ArrayList<FieldSchema>();
+ List<FieldSchema> newColumns = new ArrayList<>();
for (FieldSchema oldColumn : oldColumns) {
String oldColumnName = oldColumn.getName();
if (oldColumnName.equalsIgnoreCase(desc.getOldColumnName())) {
- oldColumn.setName(desc.getNewColumnName());
+ oldColumn.setName(desc.getNewColumnName().toLowerCase());
if (StringUtils.isNotBlank(desc.getNewColumnType())) {
oldColumn.setType(desc.getNewColumnType());
}
if (desc.getNewColumnComment() != null) {
oldColumn.setComment(desc.getNewColumnComment());
}
+ if (CollectionUtils.isNotEmpty(sd.getBucketCols()) && sd.getBucketCols().contains(oldColumnName)) {
+ sd.getBucketCols().remove(oldColumnName);
+ sd.getBucketCols().add(desc.getNewColumnName().toLowerCase());
+ }
found = true;
if (desc.isFirst() || StringUtils.isNotBlank(desc.getAfterColumn())) {
column = oldColumn;
diff --git a/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q b/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q
new file mode 100644
index 0000000..5a9b071
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/alt_bucket_tlb_change_col.q
@@ -0,0 +1,10 @@
+--! qt:dataset:src
+create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets;
+
+describe formatted alter_bucket_change_col_t1;
+
+-- Test changing name of bucket column
+
+alter table alter_bucket_change_col_t1 change Serial_num Key string;
+
+describe formatted alter_bucket_change_col_t1;
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
index d4e1a19..eaa951a 100644
--- a/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
+++ b/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table_h23.q
@@ -52,6 +52,12 @@ alter table tst1_n1 clustered by (value) into 12 buckets;
describe formatted tst1_n1;
+-- Test changing name of bucket column
+
+alter table tst1_n1 change key keys string;
+
+describe formatted tst1_n1;
+
-- Test removing buckets
alter table tst1_n1 not clustered;
diff --git a/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out b/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out
new file mode 100644
index 0000000..006b53b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/alt_bucket_tlb_change_col.q.out
@@ -0,0 +1,97 @@
+PREHOOK: query: create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alter_bucket_change_col_t1
+POSTHOOK: query: create table alter_bucket_change_col_t1(Serial_Num string, value string) partitioned by (ds string) clustered by (Serial_Num) into 10 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alter_bucket_change_col_t1
+PREHOOK: query: describe formatted alter_bucket_change_col_t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: query: describe formatted alter_bucket_change_col_t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+# col_name data_type comment
+serial_num string
+value string
+
+# Partition Information
+# col_name data_type comment
+ds string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ bucketing_version 2
+ numFiles 0
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 10
+Bucket Columns: [serial_num]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: alter table alter_bucket_change_col_t1 change Serial_num Key string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@alter_bucket_change_col_t1
+PREHOOK: Output: default@alter_bucket_change_col_t1
+POSTHOOK: query: alter table alter_bucket_change_col_t1 change Serial_num Key string
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: Output: default@alter_bucket_change_col_t1
+PREHOOK: query: describe formatted alter_bucket_change_col_t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@alter_bucket_change_col_t1
+POSTHOOK: query: describe formatted alter_bucket_change_col_t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@alter_bucket_change_col_t1
+# col_name data_type comment
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+ds string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ bucketing_version 2
+#### A masked pattern was here ####
+ numFiles 0
+ numPartitions 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 10
+Bucket Columns: [key]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
diff --git a/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
index c59a5fc..8b0a942 100644
--- a/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
+++ b/ql/src/test/results/clientpositive/llap/alter_numbuckets_partitioned_table_h23.q.out
@@ -517,6 +517,55 @@ Bucket Columns: [value]
Sort Columns: []
Storage Desc Params:
serialization.format 1
+PREHOOK: query: alter table tst1_n1 change key keys string
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@tst1_n1
+PREHOOK: Output: default@tst1_n1
+POSTHOOK: query: alter table tst1_n1 change key keys string
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@tst1_n1
+POSTHOOK: Output: default@tst1_n1
+PREHOOK: query: describe formatted tst1_n1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tst1_n1
+POSTHOOK: query: describe formatted tst1_n1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tst1_n1
+# col_name data_type comment
+keys string
+value string
+
+# Partition Information
+# col_name data_type comment
+ds string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ bucketing_version 2
+#### A masked pattern was here ####
+ numFiles 12
+ numPartitions 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: 12
+Bucket Columns: [value]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
PREHOOK: query: alter table tst1_n1 not clustered
PREHOOK: type: ALTERTABLE_CLUSTER_SORT
PREHOOK: Input: default@tst1_n1
@@ -532,7 +581,7 @@ POSTHOOK: query: describe formatted tst1_n1
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tst1_n1
# col_name data_type comment
-key string
+keys string
value string
# Partition Information
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 15b3ef7..cf4b85d 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -22,6 +22,7 @@ import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
+import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.common.repl.ReplConst;
import org.apache.hadoop.hive.common.TableName;
@@ -130,6 +131,14 @@ public class HiveAlterHandler implements AlterHandler {
throw new InvalidOperationException("Invalid column " + validate);
}
+ // Validate bucketedColumns in new table
+ List<String> bucketColumns = MetaStoreServerUtils.validateBucketColumns(newt.getSd());
+ if (CollectionUtils.isNotEmpty(bucketColumns)) {
+ String errMsg = "Bucket columns - " + bucketColumns.toString() + " doesn't match with any table columns";
+ LOG.error(errMsg);
+ throw new InvalidOperationException(errMsg);
+ }
+
Path srcPath = null;
FileSystem srcFs;
Path destPath = null;
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 1296bae..71d2de4 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -4823,6 +4823,7 @@ public class ObjectStore implements RawStore, Configurable {
Map<Integer, Integer> mapping = new HashMap<>();
for (int i = 0; i < oldCols.size(); i++) {
FieldSchema oldCol = oldCols.get(i);
+ //TODO: replace for loop with list.indexOf()
for (int j = 0; j < newCols.size(); j++) {
FieldSchema newCol = newCols.get(j);
if (oldCol.equals(newCol)) {
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
index 7541518..40738f9 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
@@ -47,6 +47,7 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicates;
@@ -1560,4 +1561,33 @@ public class MetaStoreServerUtils {
}
return tpart;
}
+
+ /**
+ * Validate bucket columns should belong to table columns.
+ * @param sd StorageDescriptor of given table
+ * @return true if bucket columns are empty or belong to table columns else false
+ */
+ public static List<String> validateBucketColumns(StorageDescriptor sd) {
+ List<String> bucketColumnNames = null;
+
+ if (CollectionUtils.isNotEmpty(sd.getBucketCols())) {
+ bucketColumnNames = sd.getBucketCols().stream().map(String::toLowerCase).collect(Collectors.toList());
+ List<String> columnNames = getColumnNames(sd.getCols());
+ if (CollectionUtils.isNotEmpty(columnNames))
+ bucketColumnNames.removeAll(columnNames);
+ }
+ return bucketColumnNames;
+ }
+
+ /**
+ * Generate list of lower case column names from the fieldSchema list
+ * @param cols fieldSchema list
+ * @return column name list
+ */
+ public static List<String> getColumnNames(List<FieldSchema> cols) {
+ if (CollectionUtils.isNotEmpty(cols)) {
+ return cols.stream().map(FieldSchema::getName).map(String::toLowerCase).collect(Collectors.toList());
+ }
+ return null;
+ }
}
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index ec639f9..7018c89 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -3428,4 +3428,64 @@ public abstract class TestHiveMetaStore {
m.invoke(hms, tbl, part, false, null);
verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
}
+
+
+ public void testAlterTableRenameBucketedColumnPositive() throws Exception {
+ String dbName = "alterTblDb";
+ String tblName = "altertbl";
+
+ client.dropTable(dbName, tblName);
+ silentDropDatabase(dbName);
+
+ new DatabaseBuilder().setName(dbName).create(client, conf);
+
+ ArrayList<FieldSchema> origCols = new ArrayList<>(2);
+ origCols.add(new FieldSchema("originalColName", ColumnType.STRING_TYPE_NAME, ""));
+ origCols.add(new FieldSchema("income", ColumnType.INT_TYPE_NAME, ""));
+ Table origTbl = new TableBuilder().setDbName(dbName).setTableName(tblName).setCols(origCols)
+ .setBucketCols(Lists.newArrayList("originalColName")).build(conf);
+ client.createTable(origTbl);
+
+ // Rename bucketed column positive case
+ ArrayList<FieldSchema> colsUpdated = new ArrayList<>(origCols);
+ colsUpdated.set(0, new FieldSchema("updatedColName1", ColumnType.STRING_TYPE_NAME, ""));
+ Table tblUpdated = client.getTable(dbName, tblName);
+ tblUpdated.getSd().setCols(colsUpdated);
+ tblUpdated.getSd().getBucketCols().set(0, colsUpdated.get(0).getName());
+ client.alter_table(dbName, tblName, tblUpdated);
+
+ Table resultTbl = client.getTable(dbName, tblUpdated.getTableName());
+ assertEquals("Num bucketed columns is not 1 ", 1, resultTbl.getSd().getBucketCols().size());
+ assertEquals("Bucketed column names incorrect", colsUpdated.get(0).getName(),
+ resultTbl.getSd().getBucketCols().get(0));
+
+ silentDropDatabase(dbName);
+ }
+
+ @Test(expected = InvalidOperationException.class)
+ public void testAlterTableRenameBucketedColumnNegative() throws Exception {
+ String dbName = "alterTblDb";
+ String tblName = "altertbl";
+
+ client.dropTable(dbName, tblName);
+ silentDropDatabase(dbName);
+
+ new DatabaseBuilder().setName(dbName).create(client, conf);
+
+ ArrayList<FieldSchema> origCols = new ArrayList<>(2);
+ origCols.add(new FieldSchema("originalColName", ColumnType.STRING_TYPE_NAME, ""));
+ origCols.add(new FieldSchema("income", ColumnType.INT_TYPE_NAME, ""));
+ Table origTbl = new TableBuilder().setDbName(dbName).setTableName(tblName).setCols(origCols)
+ .setBucketCols(Lists.newArrayList("originalColName")).build(conf);
+ client.createTable(origTbl);
+
+ // Rename bucketed column negative case
+ ArrayList<FieldSchema> colsUpdated = new ArrayList<>(origCols);
+ colsUpdated.set(0, new FieldSchema("updatedColName1", ColumnType.STRING_TYPE_NAME, ""));
+ Table tblUpdated = client.getTable(dbName, tblName);
+ tblUpdated.getSd().setCols(colsUpdated);
+ client.alter_table(dbName, tblName, tblUpdated);
+
+ silentDropDatabase(dbName);
+ }
}