You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/08/09 18:25:53 UTC

[09/47] carbondata git commit: [CARBONDATA-2798] Fix Dictionary_Include for ComplexDataType

[CARBONDATA-2798] Fix Dictionary_Include for ComplexDataType

Problem1:
Select Filter is throwing BufferUnderFlow Exception as cardinality is filled for Non-Dictionary columns.
Solution:
Check if a complex column has Encoding => Dictionary and fill cardinality for that column only.

Problem2:
Transactional Table is throwing NullPointerException if csv fileheader is not proper.
Solution:
Throw CarbonDataLoadingException if csv fileheader is not proper.

This closes #2578


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2846eddb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2846eddb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2846eddb

Branch: refs/heads/branch-1.4
Commit: 2846eddb9156276e8bf97d225fd5597d26c0cafb
Parents: d4acf03
Author: Indhumathi27 <in...@gmail.com>
Authored: Mon Jul 30 14:18:44 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Aug 9 23:38:51 2018 +0530

----------------------------------------------------------------------
 .../src/test/resources/nontransactional1.csv    |  2 ++
 .../complexType/TestComplexDataType.scala       |  7 +++++
 .../TestNonTransactionalCarbonTable.scala       | 30 +++++++++++++++++++
 .../processing/datatypes/ArrayDataType.java     | 24 +++++++++++++--
 .../processing/datatypes/StructDataType.java    | 31 +++++++++++++++-----
 .../converter/impl/FieldEncoderFactory.java     |  6 ++--
 .../processing/loading/model/LoadOption.java    |  4 ++-
 7 files changed, 91 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/resources/nontransactional1.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/nontransactional1.csv b/integration/spark-common-test/src/test/resources/nontransactional1.csv
new file mode 100644
index 0000000..ac9ec54
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/nontransactional1.csv
@@ -0,0 +1,2 @@
+arvind, 33, 6.2
+bill, 35, 7.3
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 1451f7b..1ad7889 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -971,6 +971,13 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll {
       "('dictionary_include'='b')")
     sql("insert into test values(1,2) ")
     checkAnswer(sql("select b[0] from test"),Seq(Row(2)))
+    sql("DROP TABLE IF EXISTS test")
+    sql(
+      "create table test(intval array<array<int>>,str array<array<string>>, bool " +
+      "array<array<boolean>>, sint array<array<short>>, big array<array<bigint>>)  stored by " +
+      "'carbondata' tblproperties('dictionary_include'='bool,sint,big')")
+    sql("insert into test values(1,'ab',true,22,33)")
+    checkExistence(sql("select * from test"), true, "33")
   }
 
   test("date with struct and array") {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
index 8a1d465..b92d41d 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -52,6 +52,7 @@ import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion
 import org.apache.carbondata.core.metadata.datatype.DataTypes
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverterV3}
+import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException
 import org.apache.carbondata.sdk.file._
 
 
@@ -350,6 +351,35 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
     cleanTestData()
   }
 
+  test(" test csv fileheader for transactional table") {
+    FileUtils.deleteDirectory(new File(writerPath))
+    buildTestDataWithSameUUID(3, false, null, List("name"))
+    assert(new File(writerPath).exists())
+
+    sql("DROP TABLE IF EXISTS sdkOutputTable")
+
+    sql(
+      s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+         |'$writerPath' """.stripMargin)
+
+    checkAnswer(sql("SELECT name,name FROM sdkOutputTable"), Seq(
+      Row("robot0", "robot0"),
+      Row("robot1", "robot1"),
+      Row("robot2", "robot2")))
+    //load csvfile without fileheader
+    var exception = intercept[CarbonDataLoadingException] {
+      sql(s"""load data inpath '$resourcesPath/nontransactional1.csv' into table sdkOutputTable""").show(200,false)
+    }
+    assert(exception.getMessage()
+      .contains("CSV header in input file is not proper. Column names in schema and csv header are not the same."))
+
+    sql("DROP TABLE sdkOutputTable")
+    // drop table should not delete the files
+    assert(new File(writerPath).exists())
+    cleanTestData()
+  }
+
+
   test("test count star with multiple loads files with same schema and UUID") {
     FileUtils.deleteDirectory(new File(writerPath))
     buildTestDataWithSameUUID(3, false, null, List("name"))

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
index 60972e8..0a1eba8 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java
@@ -63,6 +63,11 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
   private int outputArrayIndex;
 
   /**
+   * Dictionary column
+   */
+  private boolean isDictionaryColumn;
+
+  /**
    * current data counter
    */
   private int dataCounter;
@@ -88,6 +93,21 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
     this.columnId = columnId;
   }
 
+  /**
+   * constructor
+   * @param name
+   * @param parentname
+   * @param columnId
+   * @param isDictionaryColumn
+   */
+  public ArrayDataType(String name, String parentname, String columnId,
+      Boolean isDictionaryColumn) {
+    this.name = name;
+    this.parentname = parentname;
+    this.columnId = columnId;
+    this.isDictionaryColumn = isDictionaryColumn;
+  }
+
   /*
    * to add child dimensions
    */
@@ -153,7 +173,7 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
   }
 
   @Override public boolean getIsColumnDictionary() {
-    return true;
+    return isDictionaryColumn;
   }
 
   @Override public void writeByteArray(ArrayObject input, DataOutputStream dataOutputStream,
@@ -172,7 +192,7 @@ public class ArrayDataType implements GenericDataType<ArrayObject> {
 
   @Override
   public void fillCardinality(List<Integer> dimCardWithComplex) {
-    if (children.getIsColumnDictionary()) {
+    if (this.getIsColumnDictionary()) {
       dimCardWithComplex.add(0);
       children.fillCardinality(dimCardWithComplex);
     }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
index af95de6..31f2234 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java
@@ -57,6 +57,12 @@ public class StructDataType implements GenericDataType<StructObject> {
    * output array index
    */
   private int outputArrayIndex;
+
+  /**
+   * Dictionary column
+   */
+  private boolean isDictionaryColumn;
+
   /**
    * data counter
    */
@@ -82,6 +88,21 @@ public class StructDataType implements GenericDataType<StructObject> {
     this.columnId = columnId;
   }
 
+  /**
+   * constructor
+   * @param name
+   * @param parentname
+   * @param columnId
+   * @param isDictionaryColumn
+   */
+  public StructDataType(String name, String parentname, String columnId,
+      Boolean isDictionaryColumn) {
+    this.name = name;
+    this.parentname = parentname;
+    this.columnId = columnId;
+    this.isDictionaryColumn = isDictionaryColumn;
+  }
+
   /*
    * add child dimensions
    */
@@ -153,7 +174,7 @@ public class StructDataType implements GenericDataType<StructObject> {
   }
 
   @Override public boolean getIsColumnDictionary() {
-    return true;
+    return isDictionaryColumn;
   }
 
   @Override public void writeByteArray(StructObject input, DataOutputStream dataOutputStream,
@@ -178,13 +199,7 @@ public class StructDataType implements GenericDataType<StructObject> {
 
   @Override
   public void fillCardinality(List<Integer> dimCardWithComplex) {
-    boolean isDictionaryColumn = false;
-    for (GenericDataType child : children) {
-      if (child.getIsColumnDictionary()) {
-        isDictionaryColumn = true;
-      }
-    }
-    if (isDictionaryColumn) {
+    if (this.getIsColumnDictionary()) {
       dimCardWithComplex.add(0);
       for (int i = 0; i < children.size(); i++) {
         children.get(i).fillCardinality(dimCardWithComplex);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
index 39c12a9..e9d2b02 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java
@@ -144,7 +144,8 @@ public class FieldEncoderFactory {
           ((CarbonDimension) carbonColumn).getListOfChildDimensions();
       // Create array parser with complex delimiter
       ArrayDataType arrayDataType =
-          new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId());
+          new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(),
+              carbonColumn.hasEncoding(Encoding.DICTIONARY));
       for (CarbonDimension dimension : listOfChildDimensions) {
         arrayDataType.addChildren(
             createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier,
@@ -156,7 +157,8 @@ public class FieldEncoderFactory {
           ((CarbonDimension) carbonColumn).getListOfChildDimensions();
       // Create struct parser with complex delimiter
       StructDataType structDataType =
-          new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId());
+          new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(),
+              carbonColumn.hasEncoding(Encoding.DICTIONARY));
       for (CarbonDimension dimension : dimensions) {
         structDataType.addChildren(
             createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/2846eddb/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
index 9733816..98cd90d 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
@@ -236,7 +236,9 @@ public class LoadOption {
       }
     }
 
-    if (carbonLoadModel.isCarbonTransactionalTable() && !CarbonDataProcessorUtil
+    // In SDK flow, hadoopConf will always be null,
+    // hence FileHeader check is not required for nontransactional table
+    if (hadoopConf != null && !CarbonDataProcessorUtil
         .isHeaderValid(carbonLoadModel.getTableName(), csvColumns,
             carbonLoadModel.getCarbonDataLoadSchema(), staticPartitionCols)) {
       if (csvFile == null) {