You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/09/02 06:18:08 UTC
[carbondata] branch master updated: [CARBONDATA-3960] Default
comment should be null when adding columns
This is an automated email from the ASF dual-hosted git repository.
akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 77bbc69 [CARBONDATA-3960] Default comment should be null when adding columns
77bbc69 is described below
commit 77bbc69823d1c3a95e7a668b773ecc92504309bd
Author: QiangCai <qi...@qq.com>
AuthorDate: Tue Aug 25 19:05:07 2020 +0800
[CARBONDATA-3960] Default comment should be null when adding columns
Why is this PR needed?
1. column comment is an empty string by default when adding a column
2. there are too many redundancy codes
What changes were proposed in this PR?
1. change default column comment to null
2. re-factory code
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3898
---
.../spark/sql/catalyst/CarbonParserUtil.scala | 148 +++++----------------
.../command/carbonTableSchemaCommon.scala | 7 +-
.../spark/sql/parser/CarbonSpark2SqlParser.scala | 2 +-
.../alterTable/TestAlterTableAddColumns.scala | 27 ++++
4 files changed, 61 insertions(+), 123 deletions(-)
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
index 208befc..e4ec049 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
@@ -1047,77 +1047,31 @@ object CarbonParserUtil {
private def normalizeType(field: Field): Field = {
val dataType = field.dataType.getOrElse("NIL")
dataType match {
- case "string" =>
- Field(field.column, Some("String"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
- case "smallint" =>
- Field(field.column, Some("SmallInt"), field.name, Some(null),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
- case "integer" | "int" =>
- Field(field.column, Some("Integer"), field.name, Some(null),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
- case "long" => Field(field.column, Some("Long"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
- case "double" => Field(field.column, Some("Double"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
- case "float" => Field(field.column, Some("Double"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
- case "timestamp" =>
- Field(field.column, Some("Timestamp"), field.name, Some(null),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
- case "date" =>
- Field(field.column, Some("Date"), field.name, Some(null),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
- case "numeric" => Field(field.column, Some("Numeric"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
+ case "string" => field.copy(dataType = Some("String"))
+ case "smallint" => field.copy(dataType = Some("SmallInt"))
+ case "integer" | "int" => field.copy(dataType = Some("Integer"))
+ case "long" => field.copy(dataType = Some("Long"))
+ case "double" => field.copy(dataType = Some("Double"))
+ case "float" => field.copy(dataType = Some("Double"))
+ case "timestamp" => field.copy(dataType = Some("Timestamp"))
+ case "date" => field.copy(dataType = Some("Date"))
+ case "numeric" => field.copy(dataType = Some("Numeric"))
case "array" =>
- Field(field.column, Some("Array"), field.name,
- field.children.map(f => f.map(normalizeType(_))),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
+ field.copy(dataType = Some("Array"), children = field.children.map(_.map(normalizeType)))
case "struct" =>
- Field(field.column, Some("Struct"), field.name,
- field.children.map(f => f.map(normalizeType(_))),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
+ field.copy(dataType = Some("Struct"), children = field.children.map(_.map(normalizeType)))
case "map" =>
- Field(field.column, Some("Map"), field.name,
- field.children.map(f => f.map(normalizeType(_))),
- field.parent, field.storeType, field.schemaOrdinal,
- field.precision, field.scale, field.rawSchema, field.columnComment)
- case "bigint" => Field(field.column, Some("BigInt"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment, field.spatialIndex)
- case "decimal" => Field(field.column, Some("Decimal"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
- case "boolean" => Field(field.column, Some("Boolean"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
- field.columnComment)
+ field.copy(dataType = Some("Map"), children = field.children.map(_.map(normalizeType)))
+ case "bigint" => field.copy(dataType = Some("BigInt"))
+ case "decimal" => field.copy(dataType = Some("Decimal"))
+ case "boolean" => field.copy(dataType = Some("Boolean"))
+ case "binary" => field.copy(dataType = Some("Binary"))
// checking if the nested data type contains the child type as decimal(10,0),
// if it is present then extracting the precision and scale. resetting the data type
// with Decimal.
case _ if dataType.startsWith("decimal") =>
val (precision, scale) = CommonUtil.getScaleAndPrecision(dataType)
- Field(field.column,
- Some("Decimal"),
- field.name,
- Some(null),
- field.parent,
- field.storeType, field.schemaOrdinal, precision,
- scale,
- field.rawSchema,
- field.columnComment
- )
+ field.copy(dataType = Some("Decimal"), precision = precision, scale = scale)
case _ =>
field
}
@@ -1125,67 +1079,25 @@ object CarbonParserUtil {
private def addParent(field: Field): Field = {
field.dataType.getOrElse("NIL") match {
- case "Array" => Field(field.column, Some("Array"), field.name,
- field.children.map(f => f.map(appendParentForEachChild(_, field.column))), field.parent,
- field.storeType, field.schemaOrdinal, rawSchema = field.rawSchema,
- columnComment = field.columnComment)
- case "Struct" => Field(field.column, Some("Struct"), field.name,
- field.children.map(f => f.map(appendParentForEachChild(_, field.column))), field.parent,
- field.storeType, field.schemaOrdinal, rawSchema = field.rawSchema,
- columnComment = field.columnComment)
- case "Map" => Field(field.column, Some("Map"), field.name,
- field.children.map(f => f.map(appendParentForEachChild(_, field.column))), field.parent,
- field.storeType, field.schemaOrdinal, rawSchema = field.rawSchema,
- columnComment = field.columnComment)
+ case "Array" | "Struct" | "Map" =>
+ field.copy(children = field.children.map(_.map(appendParentForEachChild(_, field.column))))
case _ => field
}
}
private def appendParentForEachChild(field: Field, parentName: String): Field = {
field.dataType.getOrElse("NIL") match {
- case "String" => Field(parentName + "." + field.column, Some("String"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "binary" => Field(parentName + "." + field.column, Some("Binary"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "SmallInt" => Field(parentName + "." + field.column, Some("SmallInt"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Integer" => Field(parentName + "." + field.column, Some("Integer"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Long" => Field(parentName + "." + field.column, Some("Long"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Double" => Field(parentName + "." + field.column, Some("Double"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Float" => Field(parentName + "." + field.column, Some("Double"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Timestamp" => Field(parentName + "." + field.column, Some("Timestamp"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Date" => Field(parentName + "." + field.column, Some("Date"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Numeric" => Field(parentName + "." + field.column, Some("Numeric"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Array" => Field(parentName + "." + field.column, Some("Array"),
- Some(parentName + "." + field.name.getOrElse(None)),
- field.children
- .map(f => f.map(appendParentForEachChild(_, parentName + "." + field.column))),
- parentName)
- case "Struct" => Field(parentName + "." + field.column, Some("Struct"),
- Some(parentName + "." + field.name.getOrElse(None)),
- field.children
- .map(f => f.map(appendParentForEachChild(_, parentName + "." + field.column))),
- parentName)
- case "Map" => Field(parentName + "." + field.column, Some("Map"),
- Some(parentName + "." + field.name.getOrElse(None)),
- field.children
- .map(f => f.map(appendParentForEachChild(_, parentName + "." + field.column))),
- parentName)
- case "BigInt" => Field(parentName + "." + field.column, Some("BigInt"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Decimal" => Field(parentName + "." + field.column, Some("Decimal"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName,
- field.storeType, field.schemaOrdinal, field.precision, field.scale)
- case "Boolean" => Field(parentName + "." + field.column, Some("Boolean"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case _ => field
+ case "Array" | "Struct" | "Map" =>
+ val newChildren = field.children
+ .map(_.map(appendParentForEachChild(_, parentName + "." + field.column)))
+ field.copy(column = parentName + "." + field.column,
+ name = Some(parentName + "." + field.name.getOrElse(None)),
+ children = newChildren,
+ parent = parentName)
+ case _ =>
+ field.copy(column = parentName + "." + field.column,
+ name = Some(parentName + "." + field.name.getOrElse(None)),
+ parent = parentName)
}
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
index 93f6900..d96c4e1 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
@@ -23,6 +23,7 @@ import java.util.UUID
import scala.collection.JavaConverters._
import scala.collection.mutable
+import org.apache.commons.lang3.StringUtils
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.TableIdentifier
@@ -69,7 +70,7 @@ case class Field(column: String, var dataType: Option[String], name: Option[Stri
storeType: Option[String] = Some("columnar"),
var schemaOrdinal: Int = -1,
var precision: Int = 0, var scale: Int = 0, var rawSchema: String = "",
- var columnComment: String = "", var spatialIndex: Boolean = false) {
+ var columnComment: String = null, var spatialIndex: Boolean = false) {
override def equals(o: Any) : Boolean = o match {
case that: Field =>
that.column.equalsIgnoreCase(this.column)
@@ -624,9 +625,7 @@ class TableNewProcessor(cm: TableModel) {
if (isVarcharColumn(colName)) {
columnSchema.setDataType(DataTypes.VARCHAR)
}
- // TODO: Need to fill RowGroupID, converted type
- // & Number of Children after DDL finalization
- if (field.columnComment.nonEmpty) {
+ if (field.columnComment != null) {
var columnProperties = columnSchema.getColumnProperties
if (columnProperties == null) {
columnProperties = new util.HashMap[String, String]()
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
index 7b747d8..855b989 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
@@ -722,7 +722,7 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
var columnComment: String = ""
var plainComment: String = ""
if (col.getComment().isDefined) {
- columnComment = " comment \"" + col.getComment().get + "\""
+ columnComment = " comment '" + col.getComment().get + "'"
plainComment = col.getComment().get
}
// external table use float data type
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
index 7135496..d57bd91 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala
@@ -29,13 +29,20 @@ import org.apache.carbondata.core.util.CarbonProperties
class TestAlterTableAddColumns extends QueryTest with BeforeAndAfterAll {
override def beforeAll(): Unit = {
+ dropTable()
}
override def afterAll(): Unit = {
+ dropTable()
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.ENABLE_VECTOR_READER, "true")
}
+ private def dropTable(): Unit = {
+ sql("drop table if exists test_add_column_with_comment")
+ sql("drop table if exists test_add_column_for_complex_table")
+ }
+
private def testAddColumnForComplexTable(): Unit = {
val tableName = "test_add_column_for_complex_table"
sql(s"""DROP TABLE IF EXISTS ${ tableName }""")
@@ -107,6 +114,26 @@ class TestAlterTableAddColumns extends QueryTest with BeforeAndAfterAll {
sql(s"""DROP TABLE IF EXISTS ${ tableName }""")
}
+ test("alter table add columns with comment") {
+ sql("""create table test_add_column_with_comment(
+ | col1 string comment 'col1 comment',
+ | col2 int,
+ | col3 string)
+ | stored as carbondata""".stripMargin)
+ sql(
+ """alter table test_add_column_with_comment add columns(
+ | col4 string comment "col4 comment",
+ | col5 int,
+ | col6 string comment "")""".stripMargin)
+ val describe = sql("describe test_add_column_with_comment")
+ var count = describe.filter("col_name='col5' and comment is null").count()
+ assertResult(1)(count)
+ count = describe.filter("col_name='col4' and comment = 'col4 comment'").count()
+ assertResult(1)(count)
+ count = describe.filter("col_name='col6' and comment = ''").count()
+ assertResult(1)(count)
+ }
+
test("[CARBONDATA-3596] Fix exception when execute load data command or select sql on a table which includes complex columns after execute 'add column' command") {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.ENABLE_VECTOR_READER, "true")