You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2017/11/30 15:47:15 UTC

carbondata git commit: [CARBONDATA-1816][Bad Records] Changing BAD_RECORDS_ACTION default action to FAIL

Repository: carbondata
Updated Branches:
  refs/heads/master 3a3e85f62 -> 6ae1f1b61


[CARBONDATA-1816][Bad Records] Changing BAD_RECORDS_ACTION default action to FAIL

Currently, the default action is FORCE, this may allow the user by default to load bad records also. So, changing the default action of BAD_RECORDS_ACTION to FAIL will alert the user if any bad_records are there in the loading data.

This closes #1574


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6ae1f1b6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6ae1f1b6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6ae1f1b6

Branch: refs/heads/master
Commit: 6ae1f1b61cd4ac21f94f40fd76cdfd78648d117f
Parents: 3a3e85f
Author: dhatchayani <dh...@gmail.com>
Authored: Mon Nov 27 15:15:17 2017 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Thu Nov 30 21:19:44 2017 +0530

----------------------------------------------------------------------
 .../carbondata/core/constants/CarbonCommonConstants.java |  6 +++++-
 .../cluster/sdv/generated/AlterTableTestCase.scala       |  3 ++-
 .../badrecordloger/BadRecordEmptyDataTest.scala          | 11 +++++++++++
 .../dataload/TestLoadDataWithDiffTimestampFormat.scala   | 11 +++++++++++
 .../TestLoadDataWithHiveSyntaxDefaultFormat.scala        | 11 ++++++++++-
 .../DateDataTypeDirectDictionaryTest.scala               |  9 +++++++++
 .../scala/org/apache/spark/util/CarbonCommandSuite.scala | 10 ++++++++++
 7 files changed, 58 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 429ac7f..a264583 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1273,10 +1273,14 @@ public final class CarbonCommonConstants {
       "spark.sql.sources.schemaStringLengthThreshold";
 
   public static final int SPARK_SCHEMA_STRING_LENGTH_THRESHOLD_DEFAULT = 4000;
+
   @CarbonProperty
   public static final String CARBON_BAD_RECORDS_ACTION = "carbon.bad.records.action";
 
-  public static final String CARBON_BAD_RECORDS_ACTION_DEFAULT = "FORCE";
+  /**
+   * FAIL action will fail the load in case of bad records in loading data
+   */
+  public static final String CARBON_BAD_RECORDS_ACTION_DEFAULT = "FAIL";
 
   public static final String ENABLE_HIVE_SCHEMA_META_STORE = "spark.carbon.hive.schema.store";
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
index 51ddd20..b1a0f34 100644
--- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
+++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/AlterTableTestCase.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.common.util._
 import org.apache.spark.sql.test.TestQueryExecutor
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.carbondata.common.constants.LoggerAction
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
 
@@ -1067,7 +1068,7 @@ class AlterTableTestCase extends QueryTest with BeforeAndAfterAll {
   val p3 = prop.getProperty("carbon.horizontal.delete.compaction.threshold", CarbonCommonConstants.DEFAULT_DELETE_DELTAFILE_COUNT_THRESHOLD_IUD_COMPACTION)
   val p4 = prop.getProperty("carbon.compaction.level.threshold", CarbonCommonConstants.DEFAULT_SEGMENT_LEVEL_THRESHOLD)
   val p5 = prop.getProperty("carbon.enable.auto.load.merge", CarbonCommonConstants.DEFAULT_ENABLE_AUTO_LOAD_MERGE)
-  val p6 = prop.getProperty("carbon.bad.records.action", CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
+  val p6 = prop.getProperty("carbon.bad.records.action", LoggerAction.FORCE.name())
 
   override protected def beforeAll() {
     // Adding new properties

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
index 4e5af3d..4c6cc21 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/badrecordloger/BadRecordEmptyDataTest.scala
@@ -26,6 +26,8 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.spark.sql.test.util.QueryTest
 
+import org.apache.carbondata.common.constants.LoggerAction
+
 /**
  * Test Class for detailed query on timestamp datatypes
  *
@@ -34,6 +36,9 @@ import org.apache.spark.sql.test.util.QueryTest
 class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
   var hiveContext: HiveContext = _
 
+  val bad_records_action = CarbonProperties.getInstance()
+    .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
   override def beforeAll {
     try {
       sql("drop table IF EXISTS emptyColumnValues")
@@ -42,6 +47,8 @@ class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
       sql("drop table IF EXISTS empty_timestamp_false")
       sql("drop table IF EXISTS dataloadOptionTests")
       sql("drop table IF EXISTS bigtab")
+      CarbonProperties.getInstance().addProperty(
+        CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
       CarbonProperties.getInstance()
         .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
           new File("./target/test/badRecords")
@@ -175,6 +182,10 @@ class BadRecordEmptyDataTest extends QueryTest with BeforeAndAfterAll {
     sql("drop table IF EXISTS empty_timestamp_false")
     sql("drop table IF EXISTS dataloadOptionTests")
     sql("drop table IF EXISTS bigtab")
+
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+      bad_records_action)
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
index 906f05a..1b0da78 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithDiffTimestampFormat.scala
@@ -28,8 +28,16 @@ import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
 import org.apache.spark.sql.test.util.QueryTest
 
+import org.apache.carbondata.common.constants.LoggerAction
+
 class TestLoadDataWithDiffTimestampFormat extends QueryTest with BeforeAndAfterAll {
+  val bad_records_action = CarbonProperties.getInstance()
+    .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
   override def beforeAll {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
+
     sql("DROP TABLE IF EXISTS t3")
     sql("""
            CREATE TABLE IF NOT EXISTS t3
@@ -131,6 +139,9 @@ class TestLoadDataWithDiffTimestampFormat extends QueryTest with BeforeAndAfterA
   }
 
   override def afterAll {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+      bad_records_action)
     sql("DROP TABLE IF EXISTS t3")
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
index c79871f..c29e517 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
@@ -25,13 +25,19 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.spark.sql.test.util.QueryTest
 
+import org.apache.carbondata.common.constants.LoggerAction
+
 /**
   * Test Class for data loading with hive syntax and old syntax
   *
   */
 class TestLoadDataWithHiveSyntaxDefaultFormat extends QueryTest with BeforeAndAfterAll {
+  val bad_records_action = CarbonProperties.getInstance()
+    .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
 
   override def beforeAll {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
     sql("drop table if exists escapechar1")
     sql("drop table if exists escapechar2")
     sql("drop table if exists escapechar3")
@@ -709,5 +715,8 @@ class TestLoadDataWithHiveSyntaxDefaultFormat extends QueryTest with BeforeAndAf
     sql("drop table if exists hivetable1")
     sql("drop table if exists comment_test")
     sql("drop table if exists double_test")
-  }
+
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+      bad_records_action)  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
index 697b495..ad4d31d 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/DateDataTypeDirectDictionaryTest.scala
@@ -26,6 +26,8 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.spark.sql.test.util.QueryTest
 
+import org.apache.carbondata.common.constants.LoggerAction
+
 /**
   * Test Class for detailed query on timestamp datatypes
   *
@@ -33,10 +35,14 @@ import org.apache.spark.sql.test.util.QueryTest
   */
 class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll {
   var hiveContext: HiveContext = _
+  val bad_records_action = CarbonProperties.getInstance()
+    .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
 
   override def beforeAll {
     try {
       CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true")
+      CarbonProperties.getInstance().addProperty(
+        CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
       sql("drop table if exists directDictionaryTable ")
       sql(
         "CREATE TABLE if not exists directDictionaryTable (empno int,doj date, " +
@@ -145,6 +151,9 @@ class DateDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfterAll
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
         CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+      bad_records_action)
     CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "false")
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ae1f1b6/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
index 1b528f9..7729534 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala
@@ -25,12 +25,16 @@ import org.apache.spark.sql.common.util.Spark2QueryTest
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.carbondata.api.CarbonStore
+import org.apache.carbondata.common.constants.LoggerAction
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
 
 class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
 
+  val bad_records_action = CarbonProperties.getInstance()
+    .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION)
+
   protected def createAndLoadInputTable(inputTableName: String, inputPath: String): Unit = {
     sql(
       s"""
@@ -83,6 +87,8 @@ class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
   }
 
   override def beforeAll(): Unit = {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, LoggerAction.FORCE.name())
     dropTable("csv_table")
     dropTable("carbon_table")
     dropTable("carbon_table2")
@@ -93,6 +99,10 @@ class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll {
   override def afterAll(): Unit = {
     dropTable("csv_table")
     dropTable("carbon_table")
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+      bad_records_action)
+
   }
 
   private lazy val location =