You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@carbondata.apache.org by GitBox <gi...@apache.org> on 2021/04/07 06:07:32 UTC

[GitHub] [carbondata] Indhumathi27 commented on a change in pull request #4115: [CARBONDATA-4163] Support adding of single-level complex columns(array/struct)

Indhumathi27 commented on a change in pull request #4115:
URL: https://github.com/apache/carbondata/pull/4115#discussion_r608358732



##########
File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
##########
@@ -275,6 +275,56 @@ class AlterTableColumnSchemaGenerator(
         allColumns ++= Seq(columnSchema)
       }
       newCols ++= Seq(columnSchema)
+      if(field.dataType== Some("Array") ) {
+        columnSchema.setNumberOfChild(field.children.size)
+        val childField = field.children.get(0)

Review comment:
       Please handle if array<complex>/struct<complex> column is provided for add column

##########
File path: integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
##########
@@ -136,6 +137,155 @@ class TestAlterTableAddColumns extends QueryTest with BeforeAndAfterAll {
     sql(s"""DROP TABLE IF EXISTS ${ tableName }""")
   }
 
+  test("Test alter add for arrays enabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql(
+      "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+      "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='true')")
+
+    sql("insert into alter_com values(1,array(1) )")
+    sql("insert into alter_com values(2,array(9,0) )")
+    sql("insert into alter_com values(3,array(11,12,13) )")
+
+    sql(
+      "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) TBLPROPERTIES" +
+      "('LOCAL_DICTIONARY_INCLUDE'='arr3')")
+    val schema = sql("describe alter_com").collect()
+    assert(schema.size == 4)
+
+    // For the previous segments the default value for newly added array column is null
+    sql("insert into alter_com values(4,array(2),array(1,2,3,4),array('abc','def'))")
+    sql("insert into alter_com values(5,array(1,2),array(1), array('Hulk','Thor'))")
+    sql(
+      "insert into alter_com values(6,array(1,2,3),array(1234,8,33333),array('Iron','Man'," +
+      "'Jarvis'))")
+
+    val totalRows = sql("select * from alter_com").collect()
+    val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+    val b = sql("select * from alter_com where array_contains(arr3,'Thor')").collect
+    val c = sql("select * from alter_com where intField = 1").collect
+    assert(totalRows.size == 6)
+    assert(a.size == 1)
+    assert(b.size == 1)
+    // check default value for newly added array column that is index - 3 and 4
+    assert(c(0)(2) == null && c(0)(3) == null)
+    sql("DROP TABLE IF EXISTS alter_com")
+
+  }
+
+  test("Test alter add for arrays disabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_com")
+    sql(
+      "CREATE TABLE alter_com(intField INT, arr1 array<int>) " +
+      "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='false')")
+
+    sql("insert into alter_com values(1,array(1) )")
+    sql("insert into alter_com values(2,array(9,0) )")
+    sql("insert into alter_com values(3,array(11,12,13) )")
+    sql(
+      "ALTER TABLE alter_com ADD COLUMNS(arr2 array<double>, arr3 array<string>) TBLPROPERTIES" +
+      "('LOCAL_DICTIONARY_EXCLUDE'='arr3')")
+    val schema = sql("describe alter_com").collect()
+    assert(schema.size == 4)
+
+    // For the previous segments the default value for newly added array column is null
+    sql("insert into alter_com values(4,array(2),array(1,2,3,4),array('abc','def'))")
+    sql("insert into alter_com values(5,array(1,2),array(1), array('Hulk','Thor'))")
+    sql(
+      "insert into alter_com values(6,array(1,2,3),array(1234,80,3333),array('Iron','Man'," +
+      "'Jarvis'))")
+
+    val totalRows = sql("select * from alter_com").collect()
+    val a = sql("select * from alter_com where array_contains(arr2,2)").collect
+    val b = sql("select * from alter_com where array_contains(arr3,'Thor')").collect
+    val c = sql("select * from alter_com where intField = 1").collect
+    assert(totalRows.size == 6)
+    assert(a.size == 1)
+    assert(b.size == 1)
+    // check default value for newly added array column that is index - 3 and 4
+    assert(c(0)(2) == null && c(0)(3) == null)
+    sql("DROP TABLE IF EXISTS alter_com")
+
+  }
+
+  test("Test alter add for structs enabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_struct")
+    sql(
+      "create table alter_struct(roll int, department struct<id1:string,name1:string>) STORED AS " +
+      "carbondata TBLPROPERTIES('local_dictionary_enable'='true')")
+    sql(
+      "insert into alter_struct values(1, named_struct('id1', 'id1','name1','name1'))")
+    sql(
+      "ALTER TABLE alter_struct ADD COLUMNS(struct1 struct<a:string,b:string>, temp string," +
+      " intField int, struct2 struct<c:string,d:string,e:int>, arr array<int>) TBLPROPERTIES " +
+      "('LOCAL_DICTIONARY_INCLUDE'='struct1, struct2')")
+    val schema = sql("describe alter_struct").collect()
+    assert(schema.size == 7)
+
+    sql(
+      "insert into alter_struct values(2, named_struct('id1', 'id1','name1','name1'), " +
+      "named_struct('a','id2','b', 'abc2'), 'hello world', 5, named_struct('c','id3'," +
+      "'d', 'abc3','e', 22), array(1,2,3)  )")
+    sql(
+      "insert into alter_struct values(3, named_struct('id1', 'id1','name1','name1'), " +
+      "named_struct('a','id2.1','b', 'abc2.1'), 'India', 5, named_struct('c','id3.1'," +
+      "'d', 'abc3.1','e', 25), array(4,5)  )")
+
+    val totalRows = sql("select * from alter_struct").collect()
+    val a = sql("select * from alter_struct where struct1.a = 'id2' ").collect()
+    val b = sql(
+      "select * from alter_struct where struct1.a = 'id2' or struct2.c = 'id3.1' or " +
+      "array_contains(arr,5) ").collect()
+    val c = sql("select * from alter_struct where roll = 1").collect()
+
+    assert(totalRows.size == 3)
+    assert(a.size == 1)
+    assert(b.size == 2)
+    // check default value for newly added struct column
+    assert(c(0)(2) == null)
+    sql("DROP TABLE IF EXISTS alter_struct")
+
+  }
+
+  test("Test alter add for structs, disabling local dictionary") {
+    sql("DROP TABLE IF EXISTS alter_struct")
+    sql(
+      "create table alter_struct(roll int, department struct<id1:string,name1:string>) STORED AS " +
+      "carbondata TBLPROPERTIES('local_dictionary_enable'='false')")

Review comment:
       Please move common queries to a method, for Local Dict enable/disable, for both array and struct

##########
File path: core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java
##########
@@ -280,6 +280,9 @@ public static void fillQueryDimensionChunkIndexes(
 
   private static void fillParentDetails(Map<Integer, Integer> dimensionToBlockIndexMap,
       CarbonDimension dimension, Map<Integer, GenericQueryType> complexTypeMap) {
+    if (!dimensionToBlockIndexMap.containsKey(dimension.getOrdinal())) {

Review comment:
       Please add comments why this change is required

##########
File path: index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithComplexArrayType.scala
##########
@@ -43,6 +43,83 @@ class TestSIWithComplexArrayType extends QueryTest with BeforeAndAfterEach {
     sql("drop table if exists complextable5")
   }
 
+  test("Test alter add array column before creating SI") {
+    sql("drop table if exists complextable")
+    sql("create table complextable (id string, country array<string>, name string) stored as carbondata")
+    sql("insert into complextable select 1,array('china', 'us'), 'b'")
+    sql("insert into complextable select 2,array('pak'), 'v'")
+
+    sql("drop index if exists index_11 on complextable")
+    sql(
+      "ALTER TABLE complextable ADD COLUMNS(arr2 array<string>)")
+    sql("insert into complextable select 3,array('china'), 'f',array('hello','world')")
+    sql("insert into complextable select 4,array('india'),'g',array('iron','man','jarvis')")
+
+    val result1 = sql("select * from complextable where array_contains(arr2,'iron')")
+    val result2 = sql("select * from complextable where arr2[0]='iron'")
+    sql("create index index_11 on table complextable(arr2) as 'carbondata'")
+    val df1 = sql(" select * from complextable where array_contains(arr2,'iron')")
+    val df2 = sql(" select * from complextable where arr2[0]='iron'")
+    if (!isFilterPushedDownToSI(df1.queryExecution.sparkPlan)) {
+      assert(false)
+    } else {
+      assert(true)
+    }
+    if (!isFilterPushedDownToSI(df2.queryExecution.sparkPlan)) {
+      assert(false)
+    } else {
+      assert(true)
+    }
+    val doNotHitSIDf = sql(" select * from complextable where array_contains(arr2,'iron') and array_contains(arr2,'man')")
+    if (isFilterPushedDownToSI(doNotHitSIDf.queryExecution.sparkPlan)) {
+      assert(false)
+    } else {
+      assert(true)
+    }
+    checkAnswer(result1, df1)
+    checkAnswer(result2, df2)
+  }
+
+  test("Test alter add array column after creating SI") {
+    sql("drop table if exists complextable")
+    sql("create table complextable (id string, country array<string>, name string) stored as carbondata")
+    sql("insert into complextable select 1,array('china', 'us'), 'b'")
+    sql("insert into complextable select 2,array('pak'), 'v'")
+
+    sql("drop index if exists index_11 on complextable")
+    sql("create index index_11 on table complextable(country) as 'carbondata'")
+
+    sql(

Review comment:
       Please add testcases for Add column of array & struct of all primitive types




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org