You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@carbondata.apache.org by GitBox <gi...@apache.org> on 2021/08/23 06:28:58 UTC

[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #4180: [CARBONDATA-4164][CARBONDATA-4198][CARBONDATA-4199][CARBONDATA-4234] Support alter add map, multilevel complex columns and rename/change datatype.

Indhumathi27 commented on a change in pull request #4180:
URL: https://github.com/apache/carbondata/pull/4180#discussion_r693678815



##########
File path: docs/ddl-of-carbondata.md
##########
@@ -775,10 +775,10 @@ CarbonData DDL statements are documented here,which includes:
      ```
      ALTER TABLE carbon ADD COLUMNS (a1 INT, b1 STRING) TBLPROPERTIES('DEFAULT.VALUE.a1'='10')
      ```
-      **NOTE:** Adding of only single-level Complex datatype columns(only array and struct) is supported.
-      Example - 
+      Adding of single-level and nested level of Complex datatype columns is supported.

Review comment:
       this line not needed. can just remove NOTE

##########
File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/AlterTableColumnRenameTestCase.scala
##########
@@ -307,17 +296,117 @@ class AlterTableColumnRenameTestCase extends QueryTest with BeforeAndAfterAll {
           make(Array("hello11", "world11")), make(Array(4555)))))
   }
 
-  test("validate alter change datatype for complex children columns") {
+  test("test alter rename and change datatype for map of (primitive/array/struct)") {
     sql("drop table if exists test_rename")
     sql(
-      "CREATE TABLE test_rename (str struct<a:int,b:long>) STORED AS carbondata")
-
-    val ex1 = intercept[ProcessMetaDataException] {
-      sql("alter table test_rename change str str struct<a:long,b:long>")
-    }
-    assert(ex1.getMessage
-      .contains(
-        "column rename operation failed: Altering datatypes of any child column is not supported"))
+      "CREATE TABLE test_rename (map1 map<int,int>, map2 map<string,array<int>>, " +
+      "map3 map<int, map<string,int>>, map4 map<string,struct<b:int>>) STORED AS carbondata")
+    sql("insert into test_rename values (map(1,2), map('a',array(1,2)), " +
+      "map(2,map('hello',1)), map('hi',named_struct('b',3)))")
+    // rename parent column from map1 to map11 and read old rows
+    sql("alter table test_rename change map1 map11 map<int,int>")
+    sql("insert into test_rename values (map(1,2), map('a',array(1,2)), " +
+      "map(2,map('hello',1)), map('hi',named_struct('b',3)))")
+    checkAnswer(sql("select map11 from test_rename"), Seq(Row(Map(1 -> 2)),
+      Row(Map(1 -> 2))))
+    // rename parent column from map2 to map22 and read old rows
+    sql("alter table test_rename change map2 map22 map<string,array<int>>")
+    sql("insert into test_rename values (map(1,2), map('a',array(1,2)), " +
+      "map(2,map('hello',1)), map('hi',named_struct('b',3)))")
+    checkAnswer(sql("select map22 from test_rename"), Seq(Row(Map("a" -> make(Array(1, 2)))),
+      Row(Map("a" -> make(Array(1, 2)))), Row(Map("a" -> make(Array(1, 2))))))
+    // rename child column and change datatype
+    sql("alter table test_rename change map4 map4 map<string,struct<b2:long>>")
+    sql("insert into test_rename values (map(1,2), map('a',array(1,2)), " +
+        "map(2,map('hello',1)), map('hi',named_struct('b',26557544541)))")
+    sql("alter table test_rename compact 'minor'")
+    checkAnswer(sql("select map4['hi']['b2'] from test_rename"),
+      Seq(Row(3), Row(3), Row(3), Row(26557544541L)))
+  }
+
+  test("test alter rename and change datatype for struct integer") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (str struct<a:int>) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a', 1234))")
+    sql("insert into test_rename values(named_struct('a', 3456))")
+    // only rename operation
+    sql("alter table test_rename change str str1 struct<a1:int>")
+    // both rename and change datatype operation
+    sql("alter table test_rename change str1 str1 struct<a2:long>")
+    sql("insert into test_rename values(named_struct('a2', 26557544541))")
+    // rename child column
+    sql("alter table test_rename change str1 str2 struct<a3:long>")
+    sql("insert into test_rename values(named_struct('a3', 26557544541))")
+    sql("alter table test_rename compact 'minor'")
+    checkAnswer(sql("select str2 from test_rename"),
+      Seq(Row(Row(1234L)), Row(Row(3456L)), Row(Row(26557544541L)), Row(Row(26557544541L))))
+  }
+
+  test("test alter rename and change datatype for map integer") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (name string,mapField1 MAP<int, int>) STORED AS carbondata")
+    sql("insert into test_rename values('a',map(1,2))")
+    sql("insert into test_rename values('v',map(3,4))")
+    sql(s"create index si_1 on test_rename(name) as 'carbondata'")
+    // only rename operation
+    sql("alter table test_rename change mapField1 mapField2 MAP<int, int>")
+    sql("insert into test_rename values('df',map(5, 6))")
+    // both rename and change datatype operation
+    sql("alter table test_rename change mapField2 mapField3 MAP<int, long>")
+    sql("insert into test_rename values('sdf',map(7, 26557544541))")
+    sql("alter table test_rename compact 'minor'")
+    checkAnswer(sql("select mapField3 from test_rename"),
+      Seq(Row(Map(1 -> 2L)), Row(Map(3 -> 4L)), Row(Map(5 -> 6L)), Row(Map(7 -> 26557544541L))))
+  }
+
+  test("test alter rename and change datatype for array integer") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (arr array<int>) STORED AS carbondata")
+    sql("insert into test_rename values(array(1,2,3))")
+    sql("insert into test_rename values(array(4,5,6))")
+    // only rename operation
+    sql("alter table test_rename change arr arr1 array<int>")
+    sql("insert into test_rename values(array(7,8,9))")
+    // both rename and change datatype operation
+    sql("alter table test_rename change arr1 arr2 array<long>")
+    sql("insert into test_rename values(array(26557544541,3,46557544541))")
+    sql("alter table test_rename compact 'minor'")
+    checkAnswer(sql("select arr2 from test_rename"),
+      Seq(Row(make(Array(1, 2, 3))), Row(make(Array(4, 5, 6))), Row(make(Array(7, 8, 9))),
+        Row(make(Array(26557544541L, 3, 46557544541L)))))
+  }
+
+  test("test alter rename and change datatype for complex decimal types") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (strField struct<a:decimal(5,2)>," +
+        "mapField1 map<int,decimal(5,2)>, mapField2 map<int,struct<a:decimal(5,2)>>, " +
+        "arrField array<decimal(5,2)>) STORED AS carbondata")
+    sql("insert into test_rename values(named_struct('a', 123.45),map(1, 123.45)," +
+        "map(1, named_struct('a', 123.45)),array(123.45))")
+    sql("insert into test_rename values(named_struct('a', 123.45),map(2, 123.45)," +
+        "map(2, named_struct('a', 123.45)),array(123.45))")
+    // rename and change datatype
+    sql("alter table test_rename change strField strField1 struct<a1:decimal(6,2)>")
+    sql("alter table test_rename change mapField1 mapField11 map<int,decimal(6,2)>")
+    // rename and change nested decimal datatype
+    sql("alter table test_rename change mapField2 mapField22 map<int,struct<a2:decimal(6,2)>>")

Review comment:
       can validate schema also, after change datatype

##########
File path: core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
##########
@@ -119,12 +122,15 @@
         for (CarbonDimension tableDimension : tableComplexDimension) {
           if (isColumnMatches(isTransactionalTable, queryDimension.getDimension(),
               tableDimension)) {
-            ProjectionDimension currentBlockDimension = null;
+            ProjectionDimension currentBlockDimension;
             // If projection dimension is child of struct field and contains Parent Ordinal
             if (null != queryDimension.getDimension().getComplexParentDimension()) {
               currentBlockDimension = new ProjectionDimension(queryDimension.getDimension());
             } else {
               currentBlockDimension = new ProjectionDimension(tableDimension);
+              newComplexChildrenDatatypes = new HashMap<>();

Review comment:
       add comments here

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
##########
@@ -1233,9 +1226,27 @@ object CarbonParserUtil {
           }
         }
         dataTypeInfo.setChildren(childTypeInfoList)
+      case mapType: MapType =>
+        val keyType: DataType = mapType.keyType
+        val valType: DataType = mapType.valueType
+        var childTypeInfoList: List[DataTypeInfo] = List()
+        val childName1 = columnName + ".key"
+        val childName2 = columnName + ".value"
+        val keyTypeValues = keyType match {
+          case d: DecimalType => Some(List((d.precision, d.scale)))
+          case _ => None
+        }
+        val valTypeValues = valType match {

Review comment:
       can extract to new method and reuse

##########
File path: integration/spark/src/main/java/org/apache/spark/sql/secondaryindex/query/SecondaryIndexQueryResultProcessor.java
##########
@@ -362,6 +367,9 @@ private void processResult(List<CarbonIterator<RowBatch>> detailQueryResultItera
    */
   private Object getData(Object[] data, int index, DataType dataType) {
     if (data == null || data.length == 0) {
+      if (DataTypeUtil.isPrimitiveColumn(dataType)) {

Review comment:
       please check the impact for Timestamp type for this change and add test case

##########
File path: core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
##########
@@ -165,6 +171,42 @@
     return presentDimension;
   }
 
+  public static void compareDatatypes(CarbonDimension currentDimension) {
+    if (currentDimension == null) {
+      return;
+    }
+    if (currentDimension.isComplex()) {
+      for (CarbonDimension childDimension : currentDimension.getListOfChildDimensions()) {
+        compareDatatypes(childDimension);
+      }
+    } else {
+      DataType newComplexChildDataType =
+          newComplexChildrenDatatypes.get(currentDimension.getColumnId());
+      // insert datatypes only in case of primitive types, as only they are allowed to be altered.
+      if (newComplexChildDataType != null && !(newComplexChildDataType

Review comment:
       check once if change decimal type is handled 

##########
File path: docs/ddl-of-carbondata.md
##########
@@ -866,6 +866,12 @@ Users can specify which columns to include and exclude for local dictionary gene
      ALTER TABLE test_db.carbon CHANGE oldArray newArray array<int>
      ```
 
+     Example 7: Change column name in column: mapField map\<int, int> from mapField to mapField1.

Review comment:
       can add some examples for change child datatype

##########
File path: integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
##########
@@ -173,6 +175,48 @@ class ChangeDataTypeTestCases extends QueryTest with BeforeAndAfterAll {
     test_change_data_type()
   }
 
+  test("test alter change datatype for complex types") {
+    sql("drop table if exists test_rename")
+    sql("CREATE TABLE test_rename (name string) STORED AS carbondata")
+    // add complex columns
+    sql("alter table test_rename add columns(mapField1 MAP<int, int>, " +
+        "strField1 struct<a:int,b:decimal(5,2)>, arrField1 array<int>)")
+    sql("insert into test_rename values('df',map(5, 6),named_struct('a',1,'b', 123.45),array(1))")
+    // change datatype operation
+    sql("alter table test_rename change mapField1 mapField1 MAP<int, long>")
+    sql("alter table test_rename change strField1 strField1 struct<a:long,b:decimal(6,2)>")

Review comment:
       please once verify decimal type with lower to higher precision and scale change and vice versa

##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
##########
@@ -1124,24 +1125,17 @@ object CarbonParserUtil {
     val dataTypeName = DataTypeConverterUtil.convertToCarbonType(complexField).getName
     val dataTypeInfo = CarbonParserUtil.parseDataType(columnName, dataTypeName.toLowerCase, values)
     complexField.dataType match {
-      case Some(CarbonCommonConstants.ARRAY) =>
-        val childField = complexField.children.get(0)
-        val childType = childField.dataType
-        val childName = columnName + CarbonCommonConstants.POINT + childField.name
-        val childValues = childType match {
-          case d: DecimalType => Some(List((d.precision, d.scale)))
-          case _ => None
-        }
-        val childDatatypeInfo = parseDataType(childName, childField, childValues)
-        dataTypeInfo.setChildren(List(childDatatypeInfo))
-      case Some(CarbonCommonConstants.STRUCT) =>
+      case Some(CarbonCommonConstants.ARRAY) | Some(CarbonCommonConstants.STRUCT) |
+           Some(CarbonCommonConstants.MAP) =>
         var childTypeInfoList: List[DataTypeInfo] = null
         for (childField <- complexField.children.get) {
           val childType = childField.dataType
-          val childName = columnName + CarbonCommonConstants.POINT + childField.name.get
-          val childValues = childType match {
-            case d: DecimalType => Some(List((d.precision, d.scale)))
-            case _ => None
+          val childName = columnName + CarbonCommonConstants.POINT + childField.column
+          val childValues = if (childType.get.contains(CarbonCommonConstants.DECIMAL)) {
+            val decimalInfo = ("""\d+""".r findAllIn childType.get).toList

Review comment:
       please add comments here




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@carbondata.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org