You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/08/09 18:25:53 UTC
[09/47] carbondata git commit: [CARBONDATA-2798] Fix
Dictionary_Include for ComplexDataType
[CARBONDATA-2798] Fix Dictionary_Include for ComplexDataType
Problem1:
Select Filter is throwing BufferUnderFlow Exception as cardinality is filled for Non-Dictionary columns.
Solution:
Check if a complex column has Encoding => Dictionary and fill cardinality for that column only.
Problem2:
Transactional Table is throwing NullPointerException if csv fileheader is not proper.
Solution:
Throw CarbonDataLoadingException if csv fileheader is not proper.
This closes #2578
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2846eddb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2846eddb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2846eddb
Branch: refs/heads/branch-1.4
Commit: 2846eddb9156276e8bf97d225fd5597d26c0cafb
Parents: d4acf03
Author: Indhumathi27 <in...@gmail.com>
Authored: Mon Jul 30 14:18:44 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Aug 9 23:38:51 2018 +0530
----------------------------------------------------------------------
.../src/test/resources/nontransactional1.csv | 2 ++
.../complexType/TestComplexDataType.scala | 7 +++++
.../TestNonTransactionalCarbonTable.scala | 30 +++++++++++++++++++
.../processing/datatypes/ArrayDataType.java | 24 +++++++++++++--
.../processing/datatypes/StructDataType.java | 31 +++++++++++++++-----
.../converter/impl/FieldEncoderFactory.java | 6 ++--
.../processing/loading/model/LoadOption.java | 4 ++-
7 files changed, 91 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/resources/nontransactional1.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/nontransactional1.csv b/integration/spark-common-test/src/test/resources/nontransactional1.csv
new file mode 100644
index 0000000..ac9ec54
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/nontransactional1.csv
@@ -0,0 +1,2 @@
+arvind, 33, 6.2
+bill, 35, 7.3
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 1451f7b..1ad7889 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -971,6 +971,13 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll {
"('dictionary_include'='b')")
sql("insert into test values(1,2) ")
checkAnswer(sql("select b[0] from test"),Seq(Row(2)))
+ sql("DROP TABLE IF EXISTS test")
+ sql(
+ "create table test(intval array<array<int>>,str array<array<string>>, bool " +
+ "array<array<boolean>>, sint array<array<short>>, big array<array<bigint>>) stored by " +
+ "'carbondata' tblproperties('dictionary_include'='bool,sint,big')")
+ sql("insert into test values(1,'ab',true,22,33)")
+ checkExistence(sql("select * from test"), true, "33")
}
test("date with struct and array") {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 8a1d465..b92d41d 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -52,6 +52,7 @@ import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory
import org.apache.carbondata.core.metadata.ColumnarFormatVersion
import org.apache.carbondata.core.metadata.datatype.DataTypes
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverterV3}
+import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
import org.apache.carbondata.sdk.file._
@@ -350,6 +351,35 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
cleanTestData()
}
+ test(" test csv fileheader for transactional table") {
+ FileUtils.deleteDirectory(new File(writerPath))
+ buildTestDataWithSameUUID(3, false, null, List("name"))
+ assert(new File(writerPath).exists())
+
+ sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+ sql(
+ s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+ |'$writerPath' """.stripMargin)
+
+ checkAnswer(sql("SELECT name,name FROM sdkOutputTable"), Seq(
+ Row("robot0", "robot0"),
+ Row("robot1", "robot1"),
+ Row("robot2", "robot2")))
+ //load csvfile without fileheader
+ var exception = intercept[CarbonDataLoadingException] {
+ sql(s"""load data inpath '$resourcesPath/nontransactional1.csv' into table sdkOutputTable""").show(200,false)
+ }
+ assert(exception.getMessage()
+ .contains("CSV header in input file is not proper. Column names in schema and csv header are not the same."))
+
+ sql("DROP TABLE sdkOutputTable")
+ // drop table should not delete the files
+ assert(new File(writerPath).exists())
+ cleanTestData()
+ }
+
+
test("test count star with multiple loads files with same schema and UUID") {
FileUtils.deleteDirectory(new File(writerPath))
buildTestDataWithSameUUID(3, false, null, List("name"))
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
index 60972e8..0a1eba8 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
@@ -63,6 +63,11 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
private int outputArrayIndex;
/**
+ * Dictionary column
+ */
+ private boolean isDictionaryColumn;
+
+ /**
* current data counter
*/
private int dataCounter;
@@ -88,6 +93,21 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
this.columnId = columnId;
}
+ /**
+ * constructor
+ * @param name
+ * @param parentname
+ * @param columnId
+ * @param isDictionaryColumn
+ */
+ public ArrayDataType(String name, String parentname, String columnId,
+ Boolean isDictionaryColumn) {
+ this.name = name;
+ this.parentname = parentname;
+ this.columnId = columnId;
+ this.isDictionaryColumn = isDictionaryColumn;
+ }
+
/*
* to add child dimensions
*/
@@ -153,7 +173,7 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
}
@Override public boolean getIsColumnDictionary() {
- return true;
+ return isDictionaryColumn;
}
@Override public void writeByteArray(ArrayObject input, DataOutputStream dataOutputStream,
@@ -172,7 +192,7 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
@Override
public void fillCardinality(List<Integer> dimCardWithComplex) {
- if (children.getIsColumnDictionary()) {
+ if (this.getIsColumnDictionary()) {
dimCardWithComplex.add(0);
children.fillCardinality(dimCardWithComplex);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
index af95de6..31f2234 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
@@ -57,6 +57,12 @@ public class StructDataType implements GenericDataType<StructObject> {
* output array index
*/
private int outputArrayIndex;
+
+ /**
+ * Dictionary column
+ */
+ private boolean isDictionaryColumn;
+
/**
* data counter
*/
@@ -82,6 +88,21 @@ public class StructDataType implements GenericDataType<StructObject> {
this.columnId = columnId;
}
+ /**
+ * constructor
+ * @param name
+ * @param parentname
+ * @param columnId
+ * @param isDictionaryColumn
+ */
+ public StructDataType(String name, String parentname, String columnId,
+ Boolean isDictionaryColumn) {
+ this.name = name;
+ this.parentname = parentname;
+ this.columnId = columnId;
+ this.isDictionaryColumn = isDictionaryColumn;
+ }
+
/*
* add child dimensions
*/
@@ -153,7 +174,7 @@ public class StructDataType implements GenericDataType<StructObject> {
}
@Override public boolean getIsColumnDictionary() {
- return true;
+ return isDictionaryColumn;
}
@Override public void writeByteArray(StructObject input, DataOutputStream dataOutputStream,
@@ -178,13 +199,7 @@ public class StructDataType implements GenericDataType<StructObject> {
@Override
public void fillCardinality(List<Integer> dimCardWithComplex) {
- boolean isDictionaryColumn = false;
- for (GenericDataType child : children) {
- if (child.getIsColumnDictionary()) {
- isDictionaryColumn = true;
- }
- }
- if (isDictionaryColumn) {
+ if (this.getIsColumnDictionary()) {
dimCardWithComplex.add(0);
for (int i = 0; i < children.size(); i++) {
children.get(i).fillCardinality(dimCardWithComplex);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
index 39c12a9..e9d2b02 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
@@ -144,7 +144,8 @@ public class FieldEncoderFactory {
((CarbonDimension) carbonColumn).getListOfChildDimensions();
// Create array parser with complex delimiter
ArrayDataType arrayDataType =
- new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId());
+ new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(),
+ carbonColumn.hasEncoding(Encoding.DICTIONARY));
for (CarbonDimension dimension : listOfChildDimensions) {
arrayDataType.addChildren(
createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier,
@@ -156,7 +157,8 @@ public class FieldEncoderFactory {
((CarbonDimension) carbonColumn).getListOfChildDimensions();
// Create struct parser with complex delimiter
StructDataType structDataType =
- new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId());
+ new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(),
+ carbonColumn.hasEncoding(Encoding.DICTIONARY));
for (CarbonDimension dimension : dimensions) {
structDataType.addChildren(
createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier,
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
index 9733816..98cd90d 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
@@ -236,7 +236,9 @@ public class LoadOption {
}
}
- if (carbonLoadModel.isCarbonTransactionalTable() && !CarbonDataProcessorUtil
+ // In SDK flow, hadoopConf will always be null,
+ // hence FileHeader check is not required for nontransactional table
+ if (hadoopConf != null && !CarbonDataProcessorUtil
.isHeaderValid(carbonLoadModel.getTableName(), csvColumns,
carbonLoadModel.getCarbonDataLoadSchema(), staticPartitionCols)) {
if (csvFile == null) {