You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/01/08 04:52:47 UTC
[carbondata] branch master updated: [CARBONDATA-3642] Add column
name in error msg when string length exceed 32000
This is an automated email from the ASF dual-hosted git repository.
akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 511dc6d [CARBONDATA-3642] Add column name in error msg when string length exceed 32000
511dc6d is described below
commit 511dc6d0c7ce9becdd64a640c26e57f8dec25129
Author: 沈洪 <yu...@alipay.com>
AuthorDate: Tue Dec 31 18:08:23 2019 +0800
[CARBONDATA-3642] Add column name in error msg when string length exceed 32000
Problem: Currently when string length exceeds 32k for a column, it fails with exception, but it doesn't
give the column name and proper error.
Solution: When length exceeds 32k for column, throw exception with proper error message which contains
column name and suggestion to use long_string_columns table property.
This closes #3546
---
.../longstring/VarcharDataTypesBasicTestCase.scala | 2 +-
.../spark/rdd/NewCarbonDataLoadRDD.scala | 4 ++--
.../carbondata/spark/util/CarbonScalaUtil.scala | 24 ++++++++++++++++++----
.../streaming/parser/FieldConverter.scala | 5 +++--
4 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 6c8f7a0..4c73caa 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -196,7 +196,7 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi
val e = intercept[Exception] {
sql(s""" insert into testlongstring select 1, 'abc', '$longChar'""")
}
- assert(e.getMessage.contains("Dataload failed, String length cannot exceed 32000 characters"))
+ assert(e.getMessage.contains("DataLoad failure: Column description is too long"))
sql("ALTER TABLE testlongstring SET TBLPROPERTIES('long_String_columns'='description')")
sql(s""" insert into testlongstring select 1, 'ab1', '$longChar'""")
sql("drop table if exists testlongstring")
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 3e3b9a4..90d61f3 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -357,7 +357,7 @@ class NewRddIterator(rddIter: Iterator[Row],
val len = columns.length
var i = 0
while (i < len) {
- columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat,
+ columns(i) = CarbonScalaUtil.getString(row, i, carbonLoadModel, serializationNullFormat,
complexDelimiters, timeStampFormat, dateFormat,
isVarcharType = i < isVarcharTypeMapping.size && isVarcharTypeMapping(i),
isComplexType = i < isComplexTypeMapping.size && isComplexTypeMapping(i))
@@ -431,7 +431,7 @@ class LazyRddIterator(serializer: SerializerInstance,
val row = rddIter.next()
val columns = new Array[AnyRef](row.length)
for (i <- 0 until columns.length) {
- columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat,
+ columns(i) = CarbonScalaUtil.getString(row, i, carbonLoadModel, serializationNullFormat,
complexDelimiters, timeStampFormat, dateFormat,
isVarcharType = i < isVarcharTypeMapping.size && isVarcharTypeMapping(i))
}
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala
index 3f040c9..3b36bf2 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala
@@ -53,6 +53,7 @@ import org.apache.carbondata.core.util.DataTypeUtil
import org.apache.carbondata.processing.exception.DataLoadingException
import org.apache.carbondata.processing.loading.FailureCauses
import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
+import org.apache.carbondata.processing.loading.model.CarbonLoadModel
import org.apache.carbondata.processing.util.CarbonDataProcessorUtil
import org.apache.carbondata.streaming.parser.FieldConverter
@@ -60,7 +61,10 @@ object CarbonScalaUtil {
private val LOGGER: Logger = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
- def getString(value: Any,
+ def getString(
+ row: Row,
+ idx: Int,
+ carbonLoadModel: CarbonLoadModel,
serializationNullFormat: String,
complexDelimiters: util.ArrayList[String],
timeStampFormat: SimpleDateFormat,
@@ -68,9 +72,21 @@ object CarbonScalaUtil {
isVarcharType: Boolean = false,
isComplexType: Boolean = false,
level: Int = 0): String = {
- FieldConverter.objectToString(value, serializationNullFormat, complexDelimiters,
- timeStampFormat, dateFormat, isVarcharType = isVarcharType, isComplexType = isComplexType,
- level)
+ try {
+ FieldConverter.objectToString(row.get(idx), serializationNullFormat, complexDelimiters,
+ timeStampFormat, dateFormat, isVarcharType, isComplexType, level)
+ } catch {
+ case e: Exception =>
+ if (e.getMessage.startsWith(FieldConverter.stringLengthExceedErrorMsg)) {
+ val msg = s"Column ${carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
+ .getCreateOrderColumn.get(idx).getColName} is too long," +
+ s" consider to use 'long_string_columns' table property."
+ LOGGER.error(msg, e)
+ throw new Exception(msg, e)
+ } else {
+ throw e
+ }
+ }
}
/**
diff --git a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
index 93d55cf..e74c191 100644
--- a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
+++ b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
@@ -24,6 +24,7 @@ import java.util
import org.apache.carbondata.core.constants.CarbonCommonConstants
object FieldConverter {
+ val stringLengthExceedErrorMsg = "Dataload failed, String length cannot exceed "
/**
* Return a String representation of the input value
@@ -50,8 +51,8 @@ object FieldConverter {
value match {
case s: String => if (!isVarcharType && !isComplexType &&
s.length > CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT) {
- throw new Exception("Dataload failed, String length cannot exceed " +
- CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT + " characters")
+ throw new IllegalArgumentException(stringLengthExceedErrorMsg +
+ CarbonCommonConstants.MAX_CHARS_PER_COLUMN_DEFAULT + " characters")
} else {
s
}