You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2017/11/30 15:47:15 UTC
carbondata git commit: [CARBONDATA-1816][Bad Records] Changing
BAD_RECORDS_ACTION default action to FAIL
Repository: carbondata
Updated Branches:
refs/heads/master 3a3e85f62 -> 6ae1f1b61
[CARBONDATA-1816][Bad Records] Changing BAD_RECORDS_ACTION default action to FAIL
Currently, the default action is FORCE, this may allow the user by default to load bad records also. So, changing the default action of BAD_RECORDS_ACTION to FAIL will alert the user if any bad_records are there in the loading data.
This closes #1574
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6ae1f1b6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6ae1f1b6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6ae1f1b6
Branch: refs/heads/master
Commit: 6ae1f1b61cd4ac21f94f40fd76cdfd78648d117f
Parents: 3a3e85f
Author: dhatchayani <dh...@gmail.com>
Authored: Mon Nov 27 15:15:17 2017 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Thu Nov 30 21:19:44 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/constants/CarbonCommonConstants.java | 6 +++++-
.../cluster/sdv/generated/AlterTableTestCase.scala | 3 ++-
.../badrecordloger/BadRecordEmptyDataTest.scala | 11 +++++++++++
.../dataload/TestLoadDataWithDiffTimestampFormat.scala | 11 +++++++++++
.../TestLoadDataWithHiveSyntaxDefaultFormat.scala | 11 ++++++++++-
.../DateDataTypeDirectDictionaryTest.scala | 9 +++++++++
.../scala/org/apache/spark/util/CarbonCommandSuite.scala | 10 ++++++++++
7 files changed, 58 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 429ac7f..a264583 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1273,10 +1273,14 @@ public final class CarbonCommonConstants {
"spark.sql.sources.schemaStringLengthThreshold";
public static final int SPARK_SCHEMA_STRING_LENGTH_THRESHOLD_DEFAULT = 4000;
+
@CarbonProperty
public static final String CARBON_BAD_RECORDS_ACTION = "carbon.bad.records.action";
- public static final String CARBON_BAD_RECORDS_ACTION_DEFAULT = "FORCE";
+ /**
+ * FAIL action will fail the load in case of bad records in loading data
+ */
+ public static final String CARBON_BAD_RECORDS_ACTION_DEFAULT = "FAIL";
public static final String ENABLE_HIVE_SCHEMA_META_STORE = "spark.carbon.hive.schema.store";
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
index 51ddd20..b1a0f34 100644
--- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
+++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.common.util._
import org.apache.spark.sql.test.TestQueryExecutor
import org.scalatest.BeforeAndAfterAll
+import org.apache.carbondata.common.constants.LoggerAction
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
@@ -1067,7 +1068,7 @@ class AlterTableTestCase extends QueryTest with BeforeAndAfterAll {
val p3 = prop.getProperty("carbon.horizontal.delete.compaction.threshold", CarbonCommonConstants.DEFAULT_DELETE_DELTAFILE_COUNT_THRESHOLD_IUD_COMPACTION)
val p4 = prop.getProperty("carbon.compaction.level.threshold", CarbonCommonConstants.DEFAULT_SEGMENT_LEVEL_THRESHOLD)
val p5 = prop.getProperty("carbon.enable.auto.load.merge", CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE)
- val p6 = prop.getProperty("carbon.bad.records.action", CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
+ val p6 = prop.getProperty("carbon.bad.records.action", LoggerAction.FORCE.name())
override protected def beforeAll() {
// Adding new properties
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
index 4e5af3d..4c6cc21 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
@@ -26,6 +26,8 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest
+import org.apache.carbondata.common.constants.LoggerAction
+
/**
* Test Class for detailed query on timestamp datatypes
*
@@ -34,6 +36,9 @@ import org.apache.spark.sql.test.util.QueryTest
class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
var hiveContext: HiveContext = _
+ val bad_records_action = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
override def beforeAll {
try {
sql("drop table IF EXISTS emptyColumnValues")
@@ -42,6 +47,8 @@ class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
sql("drop table IF EXISTS empty_timestamp_false")
sql("drop table IF EXISTS dataloadOptionTests")
sql("drop table IF EXISTS bigtab")
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
new File("./target/test/badRecords")
@@ -175,6 +182,10 @@ class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
sql("drop table IF EXISTS empty_timestamp_false")
sql("drop table IF EXISTS dataloadOptionTests")
sql("drop table IF EXISTS bigtab")
+
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ bad_records_action)
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
index 906f05a..1b0da78 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
@@ -28,8 +28,16 @@ import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
import org.apache.spark.sql.test.util.QueryTest
+import org.apache.carbondata.common.constants.LoggerAction
+
class TestLoadDataWithDiffTimestampFormat extends QueryTest with BeforeAndAfterAll {
+ val bad_records_action = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
override def beforeAll {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
+
sql("DROP TABLE IF EXISTS t3")
sql("""
CREATE TABLE IF NOT EXISTS t3
@@ -131,6 +139,9 @@ class TestLoadDataWithDiffTimestampFormat extends QueryTest with BeforeAndAfterA
}
override def afterAll {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ bad_records_action)
sql("DROP TABLE IF EXISTS t3")
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
index c79871f..c29e517 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
@@ -25,13 +25,19 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest
+import org.apache.carbondata.common.constants.LoggerAction
+
/**
* Test Class for data loading with hive syntax and old syntax
*
*/
class TestLoadDataWithHiveSyntaxDefaultFormat extends QueryTest with BeforeAndAfterAll {
+ val bad_records_action = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
override def beforeAll {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
sql("drop table if exists escapechar1")
sql("drop table if exists escapechar2")
sql("drop table if exists escapechar3")
@@ -709,5 +715,8 @@ class TestLoadDataWithHiveSyntaxDefaultFormat extends QueryTest with BeforeAndAf
sql("drop table if exists hivetable1")
sql("drop table if exists comment_test")
sql("drop table if exists double_test")
- }
+
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ bad_records_action) }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
index 697b495..ad4d31d 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
@@ -26,6 +26,8 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.spark.sql.test.util.QueryTest
+import org.apache.carbondata.common.constants.LoggerAction
+
/**
* Test Class for detailed query on timestamp datatypes
*
@@ -33,10 +35,14 @@ import org.apache.spark.sql.test.util.QueryTest
*/
class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll {
var hiveContext: HiveContext = _
+ val bad_records_action = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
override def beforeAll {
try {
CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true")
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
sql("drop table if exists directDictionaryTable ")
sql(
"CREATE TABLE if not exists directDictionaryTable (empno int,doj date, " +
@@ -145,6 +151,9 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ bad_records_action)
CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
index 1b528f9..7729534 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
@@ -25,12 +25,16 @@ import org.apache.spark.sql.common.util.Spark2QueryTest
import org.scalatest.BeforeAndAfterAll
import org.apache.carbondata.api.CarbonStore
+import org.apache.carbondata.common.constants.LoggerAction
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.metadata.CarbonMetadata
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
+ val bad_records_action = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
protected def createAndLoadInputTable(inputTableName: String, inputPath: String): Unit = {
sql(
s"""
@@ -83,6 +87,8 @@ class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
}
override def beforeAll(): Unit = {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
dropTable("csv_table")
dropTable("carbon_table")
dropTable("carbon_table2")
@@ -93,6 +99,10 @@ class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
override def afterAll(): Unit = {
dropTable("csv_table")
dropTable("carbon_table")
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ bad_records_action)
+
}
private lazy val location =