You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ak...@apache.org on 2020/05/11 13:21:50 UTC
[carbondata] branch master updated: [CARBONDATA-3793]Data load with
partition columns fail with InvalidLoadOptionException when load option
'header' is set to 'true'
This is an automated email from the ASF dual-hosted git repository.
akashrn5 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 3a1d80f [CARBONDATA-3793]Data load with partition columns fail with InvalidLoadOptionException when load option 'header' is set to 'true'
3a1d80f is described below
commit 3a1d80fdc736e201fdbecaea921d7113428cb467
Author: Venu Reddy <k....@gmail.com>
AuthorDate: Tue May 5 22:00:49 2020 +0530
[CARBONDATA-3793]Data load with partition columns fail with InvalidLoadOptionException when
load option 'header' is set to 'true'
Why is this PR needed?
Data load with partition columns fail with InvalidLoadOptionException when load option header
is set to true.
What changes were proposed in this PR?
In SparkCarbonTableFormat.prepareWrite() method after adding the fileheader option with header
columns to optionsFinal, need to set header option to false.
This closes #3745
---
.../datasources/SparkCarbonTableFormat.scala | 2 +-
.../StandardPartitionTableQueryTestCase.scala | 24 ++++++++++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
index 6ba1702..0c8ab14 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkCarbonTableFormat.scala
@@ -125,9 +125,9 @@ with Serializable {
optionsFinal.put(
"fileheader",
dataSchema.fields.map(_.name.toLowerCase).mkString(",") + "," + partitionStr)
+ optionsFinal.put("header", "false")
val optionsLocal = new mutable.HashMap[String, String]()
optionsLocal ++= options
- optionsLocal += (("header", "false"))
new CarbonLoadModelBuilder(table).build(
optionsLocal.toMap.asJava,
optionsFinal,
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
index adef7ef..2229716 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
@@ -77,6 +77,30 @@ class StandardPartitionTableQueryTestCase extends QueryTest with BeforeAndAfterA
}
+ test("test load on partition table with load header option set to true") {
+ sql("drop table if exists partitionone")
+ sql(
+ """
+ | CREATE TABLE partitionone (empname String, designation String, doj Timestamp,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Date,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (empno int)
+ | STORED AS carbondata
+ """.stripMargin)
+ sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE partitionone OPTIONS('DELIMITER'= ',','HEADER'='true','QUOTECHAR'= '"')""")
+
+ val frame = sql(
+ "select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno," +
+ " deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary " +
+ "from partitionone where empno=11 order by empno")
+ verifyPartitionInfo(frame, Seq("empno=11"))
+
+ checkAnswer(frame,
+ sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable where empno=11 order by empno"))
+ sql("drop table if exists partitionone")
+ }
+
test("create partition table by dataframe") {
sql("select * from originTable")
.write