You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/05/11 13:21:50 UTC
[carbondata] branch master updated: [CARBONDATA-3793]Data load with partition columns fail with InvalidLoadOptionException when load option 'header' is set to 'true'

This is an automated email from the ASF dual-hosted git repository.

akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a1d80f  [CARBONDATA-3793]Data load with partition columns fail with InvalidLoadOptionException when load option 'header' is set to 'true'
3a1d80f is described below

commit 3a1d80fdc736e201fdbecaea921d7113428cb467
Author: Venu Reddy <k....@gmail.com>
AuthorDate: Tue May 5 22:00:49 2020 +0530

    [CARBONDATA-3793]Data load with partition columns fail with InvalidLoadOptionException when
    load option 'header' is set to 'true'
    
    Why is this PR needed?
    Data load with partition columns fail with InvalidLoadOptionException when load option header
    is set to true.
    
    What changes were proposed in this PR?
    In SparkCarbonTableFormat.prepareWrite() method after adding the fileheader option with header
    columns to optionsFinal, need to set header option to false.
    
    This closes #3745
---
 .../datasources/SparkCarbonTableFormat.scala       |  2 +-
 .../StandardPartitionTableQueryTestCase.scala      | 24 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
index 6ba1702..0c8ab14 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
@@ -125,9 +125,9 @@ with Serializable {
     optionsFinal.put(
       "fileheader",
       dataSchema.fields.map(_.name.toLowerCase).mkString(",") + "," + partitionStr)
+    optionsFinal.put("header", "false")
     val optionsLocal = new mutable.HashMap[String, String]()
     optionsLocal ++= options
-    optionsLocal += (("header", "false"))
     new CarbonLoadModelBuilder(table).build(
       optionsLocal.toMap.asJava,
       optionsFinal,
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
index adef7ef..2229716 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
@@ -77,6 +77,30 @@ class StandardPartitionTableQueryTestCase extends QueryTest with BeforeAndAfterA
 
   }
 
+  test("test load on partition table with load header option set to true") {
+    sql("drop table if exists partitionone")
+    sql(
+      """
+        | CREATE TABLE partitionone (empname String, designation String, doj Timestamp,
+        |  workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+        |  projectcode int, projectjoindate Timestamp, projectenddate Date,attendance int,
+        |  utilization int,salary int)
+        | PARTITIONED BY (empno int)
+        | STORED AS carbondata
+      """.stripMargin)
+    sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE partitionone OPTIONS('DELIMITER'= ',','HEADER'='true','QUOTECHAR'= '"')""")
+
+    val frame = sql(
+      "select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno," +
+      " deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary " +
+      "from partitionone where empno=11 order by empno")
+    verifyPartitionInfo(frame, Seq("empno=11"))
+
+    checkAnswer(frame,
+      sql("select  empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable where empno=11 order by empno"))
+    sql("drop table if exists partitionone")
+  }
+
   test("create partition table by dataframe") {
     sql("select * from originTable")
       .write