You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2020/10/21 04:56:21 UTC
[carbondata] branch master updated: [CARBONDATA-4036]Fix special char(`) issue in create table, when column name contains ` character

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 308f16c  [CARBONDATA-4036]Fix special char(`) issue in create table, when column name contains ` character
308f16c is described below

commit 308f16cafe55e3a53db563ae916bf55b06cbad36
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Wed Sep 16 20:14:40 2020 +0530

    [CARBONDATA-4036]Fix special char(`) issue in create table, when column
    name contains ` character
    
    Why is this PR needed?
    When a table is created with special character ``` , it fails when
    parsing to create field object. This is because ` character is used for
    tokenizing while parsing, and it will use the same char present in column
    name for parsing and it leads to wrong field object creation.
    
    What changes were proposed in this PR?
    when the column name contains the ``` , just for parsing , replace the
    character with ! character, then once the parsing is done and the files
    object is created, then replace again with the original one. This solution
    is considered so as to avoid the changes in parsing layer, as it might
    induce other layers of parsing. So this solution is considered.
    
    This closes #3983
---
 .../spark/sql/parser/CarbonSpark2SqlParser.scala   | 58 +++++++++++++++-------
 .../testsuite/dataload/TestLoadDataGeneral.scala   |  8 +--
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
index feb5775..2ee3c86 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
@@ -724,6 +724,17 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
     }
 
   def getFields(schema: Seq[StructField], isExternal: Boolean = false): Seq[Field] = {
+    def getScannerInput(col: StructField,
+        columnComment: String,
+        columnName: String) = {
+      if (col.dataType.catalogString == "float" && !isExternal) {
+        '`' + columnName + '`' + " double" + columnComment
+      } else {
+        '`' + columnName + '`' + ' ' + col.dataType.catalogString +
+        columnComment
+      }
+    }
+
     schema.map { col =>
       var columnComment: String = ""
       var plainComment: String = ""
@@ -731,15 +742,20 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
         columnComment = " comment '" + col.getComment().get + "'"
         plainComment = col.getComment().get
       }
+      val indexesToReplace = col.name.toCharArray.zipWithIndex.map {
+        case (ch, index) => if (ch.equals('`')) index }.filter(_ != ())
       // external table use float data type
       // normal table use double data type instead of float
-      val x =
-        if (col.dataType.catalogString == "float" && !isExternal) {
-          '`' + col.name + '`' + " double" + columnComment
-        } else {
-          '`' + col.name + '`' + ' ' + col.dataType.catalogString + columnComment
-        }
-      val f: Field = anyFieldDef(new lexical.Scanner(x.toLowerCase))
+      var scannerInput = if (col.name.contains("`")) {
+        // If the column name contains the character like `, then parsing fails, as ` char is used
+        // to tokenize and it wont be able to differentiate between whether its actual character or
+        // token character. So just for parsing temporarily replace with !, then once field is
+        // prepared, just replace the original name.
+        getScannerInput(col, columnComment, col.name.replaceAll("`", "!"))
+      } else {
+        getScannerInput(col, columnComment, col.name)
+      }
+      var field: Field = anyFieldDef(new lexical.Scanner(scannerInput.toLowerCase))
       match {
         case Success(field, _) =>
           if (col.dataType.catalogString == "float" && isExternal) {
@@ -749,26 +765,32 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
         case failureOrError => throw new MalformedCarbonCommandException(
           s"Unsupported data type: ${ col.dataType }")
       }
+      if (col.name.contains("`")) {
+        val actualName = indexesToReplace.foldLeft(field.column)((s, i) => s.updated(i
+          .asInstanceOf[Int], '`'))
+        field = field.copy(column = actualName, name = Some(actualName))
+        scannerInput = getScannerInput(col, columnComment, col.name)
+      }
       // the data type of the decimal type will be like decimal(10,0)
       // so checking the start of the string and taking the precision and scale.
       // resetting the data type with decimal
-      if (f.dataType.getOrElse("").startsWith("decimal")) {
+      if (field.dataType.getOrElse("").startsWith("decimal")) {
         val (precision, scale) = CommonUtil.getScaleAndPrecision(col.dataType.catalogString)
-        f.precision = precision
-        f.scale = scale
-        f.dataType = Some("decimal")
+        field.precision = precision
+        field.scale = scale
+        field.dataType = Some("decimal")
       }
-      if (f.dataType.getOrElse("").startsWith("char")) {
-        f.dataType = Some("char")
+      if (field.dataType.getOrElse("").startsWith("char")) {
+        field.dataType = Some("char")
       }
-      else if (f.dataType.getOrElse("").startsWith("float") && !isExternal) {
-        f.dataType = Some("double")
+      else if (field.dataType.getOrElse("").startsWith("float") && !isExternal) {
+        field.dataType = Some("double")
       }
-      f.rawSchema = x
+      field.rawSchema = scannerInput
       if (col.getComment().isDefined) {
-        f.columnComment = plainComment
+        field.columnComment = plainComment
       }
-      f
+      field
     }
   }
 
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index a5a2c41..86bffbb 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -350,9 +350,10 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
 
   test("test table creation with special char and other commands") {
     sql("drop table if exists special_char")
-    sql("create table special_char(`i#d` string, `nam(e` string,`ci)&#@!ty` string,`a\be` int, `ag!e` float, `na^me1` Decimal(8,4)) stored as carbondata")
-    sql("insert into special_char values('1','joey','hud', 2, 2.2, 2.3456)")
-    checkAnswer(sql("select * from special_char"), Seq(Row("1","joey","hud", 2, 2.2, 2.3456)))
+    sql("create table special_char(`i#d` string, `nam(e` string,`ci)&#@!ty` string,`a\be` int, `ag!e` float, `na^me1` Decimal(8,4), ```a``bc``!!d``` int) stored as carbondata" +
+        " tblproperties('INVERTED_INDEX'='`a`bc`!!d`', 'SORT_COLUMNS'='`a`bc`!!d`')")
+    sql("insert into special_char values('1','joey','hud', 2, 2.2, 2.3456, 5)")
+    checkAnswer(sql("select * from special_char"), Seq(Row("1","joey","hud", 2, 2.2, 2.3456, 5)))
     val df = sql("describe formatted special_char").collect()
     assert(df.exists(_.get(0).toString.contains("i#d")))
     assert(df.exists(_.get(0).toString.contains("nam(e")))
@@ -360,6 +361,7 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
     assert(df.exists(_.get(0).toString.contains("a\be")))
     assert(df.exists(_.get(0).toString.contains("ag!e")))
     assert(df.exists(_.get(0).toString.contains("na^me1")))
+    assert(df.exists(_.get(0).toString.contains("`a`bc`!!d`")))
   }
 
   override def afterEach {