You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2017/03/20 15:03:55 UTC
[1/2] incubator-carbondata git commit: [CARBONDATA-794] Numeric
dimension column value should be validated for the bad record
Repository: incubator-carbondata
Updated Branches:
refs/heads/master d6d5eca30 -> a1b8afaff
[CARBONDATA-794] Numeric dimension column value should be validated for the bad record
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/3bc9152e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/3bc9152e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/3bc9152e
Branch: refs/heads/master
Commit: 3bc9152e51b8771c68586c4e5e638be1c8acbd81
Parents: d6d5eca
Author: mohammadshahidkhan <mo...@gmail.com>
Authored: Sun Mar 19 23:21:40 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Mon Mar 20 20:27:45 2017 +0530
----------------------------------------------------------------------
.../src/test/resources/badrecords/dummy.csv | 4 +
.../NumericDimensionBadRecordTest.scala | 161 +++++++++++++++++++
.../impl/DictionaryFieldConverterImpl.java | 20 ++-
.../converter/impl/FieldEncoderFactory.java | 2 +-
.../converter/impl/RowConverterImpl.java | 2 +-
5 files changed, 184 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/badrecords/dummy.csv b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
new file mode 100644
index 0000000..39bf37f
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
@@ -0,0 +1,4 @@
+name,dob,weight
+\N,\N,1
+,,xfds
+"","",""
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
new file mode 100644
index 0000000..1301f5d
--- /dev/null
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.badrecordloger
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.hive.HiveContext
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for detailed query on timestamp dataDataTypes
+ *
+ *
+ */
+class NumericDimensionBadRecordTest extends QueryTest with BeforeAndAfterAll {
+ var hiveContext: HiveContext = _
+
+ override def beforeAll {
+ try {
+ sql("drop table IF EXISTS intDataType")
+ sql("drop table IF EXISTS longDataType")
+ sql("drop table IF EXISTS doubleDataType")
+ sql("drop table IF EXISTS floatDataType")
+ sql("drop table IF EXISTS bigDecimalDataType")
+ sql("drop table IF EXISTS stringDataType")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ new File("./target/test/badRecords")
+ .getCanonicalPath)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+ var csvFilePath = ""
+
+ // 1. bad record int DataType dimension
+ sql("create table intDataType(name String, dob timestamp, weight int)" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table intDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+ // 2. bad record long DataType dimension
+ sql("create table longDataType(name String, dob timestamp, weight long)" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table longDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+ // 3. bad record double DataType dimension
+ sql("create table doubleDataType(name String, dob timestamp, weight double)" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table doubleDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+ // 4. bad record float DataType dimension
+ sql("create table floatDataType(name String, dob timestamp, weight float)" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table floatDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+ // 5. bad record decimal DataType dimension
+ sql("create table bigDecimalDataType(name String, dob timestamp, weight decimal(3,1))" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table bigDecimalDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+ // 6. bad record string DataType dimension
+ sql("create table stringDataType(name String, dob timestamp, weight String)" +
+ " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+ csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+ sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table stringDataType options " +
+ "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+ } catch {
+ case x: Throwable => {
+ System.out.println(x.getMessage)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ }
+ }
+ }
+
+ test("select count(*) from intDataType") {
+ checkAnswer(
+ sql("select count(*) from intDataType"),
+ Seq(Row(2)
+ )
+ )
+ }
+
+ test("select count(*) from longDataType") {
+ checkAnswer(
+ sql("select count(*) from longDataType"),
+ Seq(Row(2)
+ )
+ )
+ }
+
+
+ test("select count(*) from doubleDataType") {
+ checkAnswer(
+ sql("select count(*) from doubleDataType"),
+ Seq(Row(2)
+ )
+ )
+ }
+
+ test("select count(*) from floatDataType") {
+ checkAnswer(
+ sql("select count(*) from floatDataType"),
+ Seq(Row(2)
+ )
+ )
+ }
+
+ test("select count(*) from bigDecimalDataType") {
+ checkAnswer(
+ sql("select count(*) from bigDecimalDataType"),
+ Seq(Row(2)
+ )
+ )
+ }
+
+ test("select count(*) from stringDataType") {
+ checkAnswer(
+ sql("select count(*) from stringDataType"),
+ Seq(Row(3)
+ )
+ )
+ }
+
+ override def afterAll {
+ sql("drop table IF EXISTS intDataType")
+ sql("drop table IF EXISTS longDataType")
+ sql("drop table IF EXISTS doubleDataType")
+ sql("drop table IF EXISTS floatDataType")
+ sql("drop table IF EXISTS bigDecimalDataType")
+ sql("drop table IF EXISTS stringDataType")
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
index ae5cadb..62073e2 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
@@ -55,14 +55,17 @@ public class DictionaryFieldConverterImpl extends AbstractDictionaryFieldConvert
private DictionaryMessage dictionaryMessage;
+ private boolean isEmptyBadRecord;
+
public DictionaryFieldConverterImpl(DataField dataField,
Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache,
CarbonTableIdentifier carbonTableIdentifier, String nullFormat, int index,
DictionaryClient client, boolean useOnePass, String storePath, boolean tableInitialize,
- Map<Object, Integer> localCache) throws IOException {
+ Map<Object, Integer> localCache, boolean isEmptyBadRecord) throws IOException {
this.index = index;
this.carbonDimension = (CarbonDimension) dataField.getColumn();
this.nullFormat = nullFormat;
+ this.isEmptyBadRecord = isEmptyBadRecord;
DictionaryColumnUniqueIdentifier identifier =
new DictionaryColumnUniqueIdentifier(carbonTableIdentifier,
dataField.getColumn().getColumnIdentifier(), dataField.getColumn().getDataType());
@@ -94,8 +97,19 @@ public class DictionaryFieldConverterImpl extends AbstractDictionaryFieldConvert
@Override public void convert(CarbonRow row, BadRecordLogHolder logHolder)
throws CarbonDataLoadingException {
try {
- String parsedValue = DataTypeUtil.parseValue(row.getString(index), carbonDimension);
- if (null == parsedValue || parsedValue.equals(nullFormat)) {
+ String dimensionValue = row.getString(index);
+ if (dimensionValue == null || dimensionValue.equals(nullFormat)) {
+ dimensionValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
+ }
+ String parsedValue = DataTypeUtil.parseValue(dimensionValue, carbonDimension);
+ if (null == parsedValue) {
+ if ((dimensionValue.length() > 0) || (dimensionValue.length() == 0 && isEmptyBadRecord)) {
+ String dataType = carbonDimension.getDataType().getName();
+ logHolder.setReason(
+ "The value " + " \"" + dimensionValue + "\"" + " with column name " + carbonDimension
+ .getColName() + " and column data type " + dataType + " is not a valid "
+ + dataType + " type.");
+ }
row.update(CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY, index);
} else {
row.update(dictionaryGenerator.getOrGenerateKey(parsedValue), index);
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
index 660f256..158f3f0 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
@@ -75,7 +75,7 @@ public class FieldEncoderFactory {
} else if (dataField.getColumn().hasEncoding(Encoding.DICTIONARY) &&
!dataField.getColumn().isComplex()) {
return new DictionaryFieldConverterImpl(dataField, cache, carbonTableIdentifier, nullFormat,
- index, client, useOnePass, storePath, tableInitialize, localCache);
+ index, client, useOnePass, storePath, tableInitialize, localCache, isEmptyBadRecord);
} else if (dataField.getColumn().isComplex()) {
return new ComplexFieldConverterImpl(
createComplexType(dataField, cache, carbonTableIdentifier,
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
index 3ba7bdf..2471314 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
@@ -155,7 +155,7 @@ public class RowConverterImpl implements RowConverter {
fieldConverters[i].convert(row, logHolder);
if (!logHolder.isLogged() && logHolder.isBadRecordNotAdded()) {
if (badRecordLogger.isDataLoadFail()) {
- String error = "Data load failed due to bad bad record: " + logHolder.getReason();
+ String error = "Data load failed due to bad record: " + logHolder.getReason();
throw new CarbonDataLoadingException(error);
}
badRecordLogger.addBadRecordsToBuilder(copy.getData(), logHolder.getReason());
[2/2] incubator-carbondata git commit: [CARBONDATA-794] Numeric
dimension column value should be validated for bad record. This closes #673
Posted by gv...@apache.org.
[CARBONDATA-794] Numeric dimension column value should be validated for bad record. This closes #673
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/a1b8afaf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/a1b8afaf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/a1b8afaf
Branch: refs/heads/master
Commit: a1b8afaff1e7982037391adcdebae089ca8099ca
Parents: d6d5eca 3bc9152
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Mon Mar 20 20:33:32 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Mon Mar 20 20:33:32 2017 +0530
----------------------------------------------------------------------
.../src/test/resources/badrecords/dummy.csv | 4 +
.../NumericDimensionBadRecordTest.scala | 161 +++++++++++++++++++
.../impl/DictionaryFieldConverterImpl.java | 20 ++-
.../converter/impl/FieldEncoderFactory.java | 2 +-
.../converter/impl/RowConverterImpl.java | 2 +-
5 files changed, 184 insertions(+), 5 deletions(-)
----------------------------------------------------------------------