You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/09/16 07:58:14 UTC

[carbondata] branch master updated: [CARBONDATA-3984] compaction on table having range column after altering datatype from string to long string fails

This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new eec8ccc  [CARBONDATA-3984] compaction on table having range column after altering datatype from string to long string fails
eec8ccc is described below

commit eec8ccc5eea88ba97aec1a813544747e4c7758e2
Author: Karan980 <ka...@gmail.com>
AuthorDate: Sun Sep 13 23:51:22 2020 +0530

    [CARBONDATA-3984] compaction on table having range column after altering datatype from string
    to long string fails
    
    Why is this PR needed?
    Fix multiple issues occurred after altering the column dataType to long_string_column.
    a) When dataType of a String column which is also provided as range column in table properties
    is altered to long_string_column. It throws following error while performing compaction on the
    table.
    VARCHAR not supported for the filter expression;
    b) When longStringColumn name is present in UpperCase then Validation check for SortColumns
    fails. As names of SortColumns are present in LowerCase.
    
    What changes were proposed in this PR?
    a) Added condition for VARCHAR datatype in all conditional expressions.
    b) Changed the long_string_columns name to lowerCase before doing validation.
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #3923
---
 .../expression/conditional/EqualToExpression.java  |  2 +-
 .../conditional/GreaterThanEqualToExpression.java  |  2 +-
 .../conditional/GreaterThanExpression.java         |  2 +-
 .../scan/expression/conditional/InExpression.java  |  2 +-
 .../conditional/LessThanEqualToExpression.java     |  2 +-
 .../expression/conditional/LessThanExpression.java |  2 +-
 .../conditional/NotEqualsExpression.java           |  2 +-
 .../expression/conditional/NotInExpression.java    |  2 +-
 .../apache/carbondata/core/util/CarbonUtil.java    |  2 +
 .../org/apache/spark/util/AlterTableUtil.scala     | 10 ++--
 .../longstring/VarcharDataTypesBasicTestCase.scala | 56 +++++++++++++++++++++-
 11 files changed, 70 insertions(+), 14 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
index fe0d5fd..c326840 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
@@ -70,7 +70,7 @@ public class EqualToExpression extends BinaryConditionalExpression {
     DataType dataType = val1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = val1.getBoolean().equals(val2.getBoolean());
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = val1.getString().equals(val2.getString());
     } else if (dataType == DataTypes.SHORT) {
       result = val1.getShort().equals(val2.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
index 64cd6c7..db67784 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
@@ -52,7 +52,7 @@ public class GreaterThanEqualToExpression extends BinaryConditionalExpression {
     DataType dataType = exprResVal1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = elRes.getBoolean().compareTo(erRes.getBoolean()) >= 0;
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = elRes.getString().compareTo(erRes.getString()) >= 0;
     } else if (dataType == DataTypes.SHORT) {
       result = elRes.getShort() >= (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
index 2ca56a5..31f1b86 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
@@ -54,7 +54,7 @@ public class GreaterThanExpression extends BinaryConditionalExpression {
     DataType dataType = val1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = exprLeftRes.getBoolean().compareTo(exprRightRes.getBoolean()) > 0;
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = exprLeftRes.getString().compareTo(exprRightRes.getString()) > 0;
     } else if (dataType == DataTypes.DOUBLE) {
       result = exprLeftRes.getDouble() > (exprRightRes.getDouble());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
index 7a1607c..681d8ba 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
@@ -57,7 +57,7 @@ public class InExpression extends BinaryConditionalExpression {
         DataType dataType = val.getDataType();
         if (dataType == DataTypes.BOOLEAN) {
           val = new ExpressionResult(val.getDataType(), expressionResVal.getBoolean());
-        } else if (dataType == DataTypes.STRING) {
+        } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
           val = new ExpressionResult(val.getDataType(), expressionResVal.getString());
         } else if (dataType == DataTypes.SHORT) {
           val = new ExpressionResult(val.getDataType(), expressionResVal.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
index e2b0512..53a689d 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
@@ -52,7 +52,7 @@ public class LessThanEqualToExpression extends BinaryConditionalExpression {
     DataType dataType = exprResValue1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = elRes.getBoolean().compareTo(erRes.getBoolean()) <= 0;
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = elRes.getString().compareTo(erRes.getString()) <= 0;
     } else if (dataType == DataTypes.SHORT) {
       result = elRes.getShort() <= (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
index bee6dfe..671f1f1 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
@@ -56,7 +56,7 @@ public class LessThanExpression extends BinaryConditionalExpression {
     DataType dataType = val1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = elRes.getBoolean().compareTo(erRes.getBoolean()) < 0;
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = elRes.getString().compareTo(erRes.getString()) < 0;
     } else if (dataType == DataTypes.SHORT) {
       result = elRes.getShort() < (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
index f393754..d0baed9 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
@@ -66,7 +66,7 @@ public class NotEqualsExpression extends BinaryConditionalExpression {
     DataType dataType = val1.getDataType();
     if (dataType == DataTypes.BOOLEAN) {
       result = !val1.getBoolean().equals(val2.getBoolean());
-    } else if (dataType == DataTypes.STRING) {
+    } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
       result = !val1.getString().equals(val2.getString());
     } else if (dataType == DataTypes.SHORT) {
       result = val1.getShort().shortValue() != val2.getShort().shortValue();
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
index 1ce7e98..5264b89 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
@@ -80,7 +80,7 @@ public class NotInExpression extends BinaryConditionalExpression {
           val = new ExpressionResult(val.getDataType(), exprResVal.getBoolean());
         } else if (dataType == DataTypes.STRING) {
           val = new ExpressionResult(val.getDataType(), exprResVal.getString());
-        } else if (dataType == DataTypes.SHORT) {
+        } else if (dataType == DataTypes.SHORT || dataType == DataTypes.VARCHAR) {
           val = new ExpressionResult(val.getDataType(), exprResVal.getShort());
         } else if (dataType == DataTypes.INT) {
           val = new ExpressionResult(val.getDataType(), exprResVal.getInt());
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 32be743..21a34b6 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -3344,6 +3344,8 @@ public final class CarbonUtil {
     // for setting long string columns
     if (!longStringColumnsString.isEmpty()) {
       String[] inputColumns = longStringColumnsString.split(",");
+      inputColumns = Arrays.stream(inputColumns).map(String::trim)
+              .map(String::toLowerCase).toArray(String[]::new);
       Set<String> longStringSet = new HashSet<>(Arrays.asList(inputColumns));
       List<org.apache.carbondata.format.ColumnSchema> varCharColumns = new ArrayList<>();
       // change data type to varchar and extract the varchar columns
diff --git a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
index be4e424..3c10571 100644
--- a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
@@ -865,7 +865,7 @@ object AlterTableUtil {
   def validateLongStringColumns(longStringColumns: String,
       carbonTable: CarbonTable): Unit = {
     // don't allow duplicate column names
-    val longStringCols = longStringColumns.split(",")
+    val longStringCols = longStringColumns.split(",").map(column => column.trim.toLowerCase)
     if (longStringCols.distinct.lengthCompare(longStringCols.size) != 0) {
       val duplicateColumns = longStringCols
         .diff(longStringCols.distinct).distinct
@@ -877,14 +877,14 @@ object AlterTableUtil {
     // check if the column specified exists in table schema and must be of string data type
     val colSchemas = carbonTable.getTableInfo.getFactTable.getListOfColumns.asScala
     longStringCols.foreach { col =>
-      if (!colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col.trim))) {
-        val errorMsg = "LONG_STRING_COLUMNS column: " + col.trim +
+      if (!colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col))) {
+        val errorMsg = "LONG_STRING_COLUMNS column: " + col +
                        " does not exist in table. Please check the DDL."
         throw new MalformedCarbonCommandException(errorMsg)
-      } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col.trim) &&
+      } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col) &&
                                           !x.getDataType.toString
                                             .equalsIgnoreCase("STRING"))) {
-        val errMsg = "LONG_STRING_COLUMNS column: " + col.trim +
+        val errMsg = "LONG_STRING_COLUMNS column: " + col +
                      " is not a string datatype column"
         throw new MalformedCarbonCommandException(errMsg)
       }
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 706095a..8e1b114 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -17,6 +17,9 @@
 
 package org.apache.carbondata.spark.testsuite.longstring
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
 import java.io.{File, PrintWriter}
 
 import org.apache.commons.lang3.RandomStringUtils
@@ -29,8 +32,8 @@ import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandExcepti
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager
 import org.apache.carbondata.core.util.CarbonProperties
-import scala.collection.mutable
 
 class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll {
   private val longStringTable = "long_string_table"
@@ -110,6 +113,57 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi
     assert(exceptionCaught.getMessage.contains("its data type is not string"))
   }
 
+  test("cannot alter sort_columns dataType to long_string_columns") {
+    val exceptionCaught = intercept[RuntimeException] {
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, NAME STRING, description STRING, address STRING, note STRING
+           | ) STORED AS carbondata
+           | TBLPROPERTIES('SORT_COLUMNS'='name, address')
+           |""".
+          stripMargin)
+      sql("ALTER TABLE long_string_table SET TBLPROPERTIES('long_String_columns'='NAME')")
+    }
+    assert(exceptionCaught.getMessage.contains("LONG_STRING_COLUMNS cannot be present in sort columns: name"))
+  }
+
+  test("check compaction after altering range column dataType to longStringColumn") {
+    val existingValue = CarbonProperties.getInstance()
+      .getProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD)
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2")
+      sql(
+        s"""
+           | CREATE TABLE if not exists $longStringTable(
+           | id INT, NAME STRING, description STRING
+           | ) STORED AS carbondata
+           | TBLPROPERTIES('RANGE_COLUMN'='Name')
+           |""".
+          stripMargin)
+    sql("ALTER TABLE long_string_table SET TBLPROPERTIES('long_String_columns'='NAME')")
+    sql("insert into long_string_table select 1, 'ab', 'cool1'")
+    sql("insert into long_string_table select 2, 'abc', 'cool2'")
+    sql("ALTER TABLE long_string_table compact 'minor'")
+    val carbonTable = CarbonMetadata.getInstance().getCarbonTable(
+      CarbonCommonConstants.DATABASE_DEFAULT_NAME, "long_string_table")
+    val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
+    val segmentStatusManager: SegmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier)
+    val segments = segmentStatusManager.getValidAndInvalidSegments.getValidSegments.asScala.map(_.getSegmentNo).toList
+    assert(segments.contains("0.1"))
+    assert(!segments.contains("0"))
+    assert(!segments.contains("1"))
+    if (existingValue != null) {
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD,
+          existingValue)
+    } else {
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD,
+          CarbonCommonConstants.DEFAULT_SEGMENT_LEVEL_THRESHOLD)
+    }
+  }
+
   test("long string columns cannot contain duplicate columns") {
     val exceptionCaught = intercept[MalformedCarbonCommandException] {
       sql(