You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/09/16 07:58:14 UTC
[carbondata] branch master updated: [CARBONDATA-3984] compaction on
table having range column after altering datatype from string to long
string fails
This is an automated email from the ASF dual-hosted git repository.
akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new eec8ccc [CARBONDATA-3984] compaction on table having range column after altering datatype from string to long string fails
eec8ccc is described below
commit eec8ccc5eea88ba97aec1a813544747e4c7758e2
Author: Karan980 <ka...@gmail.com>
AuthorDate: Sun Sep 13 23:51:22 2020 +0530
[CARBONDATA-3984] compaction on table having range column after altering datatype from string
to long string fails
Why is this PR needed?
Fix multiple issues occurred after altering the column dataType to long_string_column.
a) When dataType of a String column which is also provided as range column in table properties
is altered to long_string_column. It throws following error while performing compaction on the
table.
VARCHAR not supported for the filter expression;
b) When longStringColumn name is present in UpperCase then Validation check for SortColumns
fails. As names of SortColumns are present in LowerCase.
What changes were proposed in this PR?
a) Added condition for VARCHAR datatype in all conditional expressions.
b) Changed the long_string_columns name to lowerCase before doing validation.
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #3923
---
.../expression/conditional/EqualToExpression.java | 2 +-
.../conditional/GreaterThanEqualToExpression.java | 2 +-
.../conditional/GreaterThanExpression.java | 2 +-
.../scan/expression/conditional/InExpression.java | 2 +-
.../conditional/LessThanEqualToExpression.java | 2 +-
.../expression/conditional/LessThanExpression.java | 2 +-
.../conditional/NotEqualsExpression.java | 2 +-
.../expression/conditional/NotInExpression.java | 2 +-
.../apache/carbondata/core/util/CarbonUtil.java | 2 +
.../org/apache/spark/util/AlterTableUtil.scala | 10 ++--
.../longstring/VarcharDataTypesBasicTestCase.scala | 56 +++++++++++++++++++++-
11 files changed, 70 insertions(+), 14 deletions(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
index fe0d5fd..c326840 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/EqualToExpression.java
@@ -70,7 +70,7 @@ public class EqualToExpression extends BinaryConditionalExpression {
DataType dataType = val1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = val1.getBoolean().equals(val2.getBoolean());
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = val1.getString().equals(val2.getString());
} else if (dataType == DataTypes.SHORT) {
result = val1.getShort().equals(val2.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
index 64cd6c7..db67784 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanEqualToExpression.java
@@ -52,7 +52,7 @@ public class GreaterThanEqualToExpression extends BinaryConditionalExpression {
DataType dataType = exprResVal1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = elRes.getBoolean().compareTo(erRes.getBoolean()) >= 0;
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = elRes.getString().compareTo(erRes.getString()) >= 0;
} else if (dataType == DataTypes.SHORT) {
result = elRes.getShort() >= (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
index 2ca56a5..31f1b86 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/GreaterThanExpression.java
@@ -54,7 +54,7 @@ public class GreaterThanExpression extends BinaryConditionalExpression {
DataType dataType = val1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = exprLeftRes.getBoolean().compareTo(exprRightRes.getBoolean()) > 0;
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = exprLeftRes.getString().compareTo(exprRightRes.getString()) > 0;
} else if (dataType == DataTypes.DOUBLE) {
result = exprLeftRes.getDouble() > (exprRightRes.getDouble());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
index 7a1607c..681d8ba 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/InExpression.java
@@ -57,7 +57,7 @@ public class InExpression extends BinaryConditionalExpression {
DataType dataType = val.getDataType();
if (dataType == DataTypes.BOOLEAN) {
val = new ExpressionResult(val.getDataType(), expressionResVal.getBoolean());
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
val = new ExpressionResult(val.getDataType(), expressionResVal.getString());
} else if (dataType == DataTypes.SHORT) {
val = new ExpressionResult(val.getDataType(), expressionResVal.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
index e2b0512..53a689d 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanEqualToExpression.java
@@ -52,7 +52,7 @@ public class LessThanEqualToExpression extends BinaryConditionalExpression {
DataType dataType = exprResValue1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = elRes.getBoolean().compareTo(erRes.getBoolean()) <= 0;
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = elRes.getString().compareTo(erRes.getString()) <= 0;
} else if (dataType == DataTypes.SHORT) {
result = elRes.getShort() <= (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
index bee6dfe..671f1f1 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/LessThanExpression.java
@@ -56,7 +56,7 @@ public class LessThanExpression extends BinaryConditionalExpression {
DataType dataType = val1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = elRes.getBoolean().compareTo(erRes.getBoolean()) < 0;
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = elRes.getString().compareTo(erRes.getString()) < 0;
} else if (dataType == DataTypes.SHORT) {
result = elRes.getShort() < (erRes.getShort());
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
index f393754..d0baed9 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotEqualsExpression.java
@@ -66,7 +66,7 @@ public class NotEqualsExpression extends BinaryConditionalExpression {
DataType dataType = val1.getDataType();
if (dataType == DataTypes.BOOLEAN) {
result = !val1.getBoolean().equals(val2.getBoolean());
- } else if (dataType == DataTypes.STRING) {
+ } else if (dataType == DataTypes.STRING || dataType == DataTypes.VARCHAR) {
result = !val1.getString().equals(val2.getString());
} else if (dataType == DataTypes.SHORT) {
result = val1.getShort().shortValue() != val2.getShort().shortValue();
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
index 1ce7e98..5264b89 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/NotInExpression.java
@@ -80,7 +80,7 @@ public class NotInExpression extends BinaryConditionalExpression {
val = new ExpressionResult(val.getDataType(), exprResVal.getBoolean());
} else if (dataType == DataTypes.STRING) {
val = new ExpressionResult(val.getDataType(), exprResVal.getString());
- } else if (dataType == DataTypes.SHORT) {
+ } else if (dataType == DataTypes.SHORT || dataType == DataTypes.VARCHAR) {
val = new ExpressionResult(val.getDataType(), exprResVal.getShort());
} else if (dataType == DataTypes.INT) {
val = new ExpressionResult(val.getDataType(), exprResVal.getInt());
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 32be743..21a34b6 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -3344,6 +3344,8 @@ public final class CarbonUtil {
// for setting long string columns
if (!longStringColumnsString.isEmpty()) {
String[] inputColumns = longStringColumnsString.split(",");
+ inputColumns = Arrays.stream(inputColumns).map(String::trim)
+ .map(String::toLowerCase).toArray(String[]::new);
Set<String> longStringSet = new HashSet<>(Arrays.asList(inputColumns));
List<org.apache.carbondata.format.ColumnSchema> varCharColumns = new ArrayList<>();
// change data type to varchar and extract the varchar columns
diff --git a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
index be4e424..3c10571 100644
--- a/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
@@ -865,7 +865,7 @@ object AlterTableUtil {
def validateLongStringColumns(longStringColumns: String,
carbonTable: CarbonTable): Unit = {
// don't allow duplicate column names
- val longStringCols = longStringColumns.split(",")
+ val longStringCols = longStringColumns.split(",").map(column => column.trim.toLowerCase)
if (longStringCols.distinct.lengthCompare(longStringCols.size) != 0) {
val duplicateColumns = longStringCols
.diff(longStringCols.distinct).distinct
@@ -877,14 +877,14 @@ object AlterTableUtil {
// check if the column specified exists in table schema and must be of string data type
val colSchemas = carbonTable.getTableInfo.getFactTable.getListOfColumns.asScala
longStringCols.foreach { col =>
- if (!colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col.trim))) {
- val errorMsg = "LONG_STRING_COLUMNS column: " + col.trim +
+ if (!colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col))) {
+ val errorMsg = "LONG_STRING_COLUMNS column: " + col +
" does not exist in table. Please check the DDL."
throw new MalformedCarbonCommandException(errorMsg)
- } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col.trim) &&
+ } else if (colSchemas.exists(x => x.getColumnName.equalsIgnoreCase(col) &&
!x.getDataType.toString
.equalsIgnoreCase("STRING"))) {
- val errMsg = "LONG_STRING_COLUMNS column: " + col.trim +
+ val errMsg = "LONG_STRING_COLUMNS column: " + col +
" is not a string datatype column"
throw new MalformedCarbonCommandException(errMsg)
}
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 706095a..8e1b114 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -17,6 +17,9 @@
package org.apache.carbondata.spark.testsuite.longstring
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
import java.io.{File, PrintWriter}
import org.apache.commons.lang3.RandomStringUtils
@@ -29,8 +32,8 @@ import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandExcepti
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.metadata.CarbonMetadata
import org.apache.carbondata.core.metadata.datatype.DataTypes
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager
import org.apache.carbondata.core.util.CarbonProperties
-import scala.collection.mutable
class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll {
private val longStringTable = "long_string_table"
@@ -110,6 +113,57 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi
assert(exceptionCaught.getMessage.contains("its data type is not string"))
}
+ test("cannot alter sort_columns dataType to long_string_columns") {
+ val exceptionCaught = intercept[RuntimeException] {
+ sql(
+ s"""
+ | CREATE TABLE if not exists $longStringTable(
+ | id INT, NAME STRING, description STRING, address STRING, note STRING
+ | ) STORED AS carbondata
+ | TBLPROPERTIES('SORT_COLUMNS'='name, address')
+ |""".
+ stripMargin)
+ sql("ALTER TABLE long_string_table SET TBLPROPERTIES('long_String_columns'='NAME')")
+ }
+ assert(exceptionCaught.getMessage.contains("LONG_STRING_COLUMNS cannot be present in sort columns: name"))
+ }
+
+ test("check compaction after altering range column dataType to longStringColumn") {
+ val existingValue = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD, "2,2")
+ sql(
+ s"""
+ | CREATE TABLE if not exists $longStringTable(
+ | id INT, NAME STRING, description STRING
+ | ) STORED AS carbondata
+ | TBLPROPERTIES('RANGE_COLUMN'='Name')
+ |""".
+ stripMargin)
+ sql("ALTER TABLE long_string_table SET TBLPROPERTIES('long_String_columns'='NAME')")
+ sql("insert into long_string_table select 1, 'ab', 'cool1'")
+ sql("insert into long_string_table select 2, 'abc', 'cool2'")
+ sql("ALTER TABLE long_string_table compact 'minor'")
+ val carbonTable = CarbonMetadata.getInstance().getCarbonTable(
+ CarbonCommonConstants.DATABASE_DEFAULT_NAME, "long_string_table")
+ val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
+ val segmentStatusManager: SegmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier)
+ val segments = segmentStatusManager.getValidAndInvalidSegments.getValidSegments.asScala.map(_.getSegmentNo).toList
+ assert(segments.contains("0.1"))
+ assert(!segments.contains("0"))
+ assert(!segments.contains("1"))
+ if (existingValue != null) {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD,
+ existingValue)
+ } else {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.COMPACTION_SEGMENT_LEVEL_THRESHOLD,
+ CarbonCommonConstants.DEFAULT_SEGMENT_LEVEL_THRESHOLD)
+ }
+ }
+
test("long string columns cannot contain duplicate columns") {
val exceptionCaught = intercept[MalformedCarbonCommandException] {
sql(