You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/08/27 00:44:58 UTC

[1/2] incubator-carbondata git commit: fix load data with first line is null

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 518b1325b -> 5d1a177e7


fix load data with first line is null

add test data


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/166d410e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/166d410e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/166d410e

Branch: refs/heads/master
Commit: 166d410e6579680a15392062dd53945468b00550
Parents: 518b132
Author: foryou2030 <fo...@126.com>
Authored: Tue Aug 16 20:08:19 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Aug 27 08:43:50 2016 +0800

----------------------------------------------------------------------
 .../spark/csv/CarbonCsvRelation.scala           |  3 ++-
 .../test/resources/dataWithNullFirstLine.csv    | 11 ++++++++++
 .../dataload/TestLoadDataWithBlankLine.scala    | 21 ++++++++++++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/166d410e/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala
index 8f86d35..ae527ff 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/csv/CarbonCsvRelation.scala
@@ -25,6 +25,7 @@ import com.databricks.spark.csv.newapi.CarbonTextFile
 import com.databricks.spark.csv.util._
 import com.databricks.spark.sql.readers._
 import org.apache.commons.csv._
+import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
@@ -173,7 +174,7 @@ case class CarbonCsvRelation protected[spark] (
       csv.first()
     } else {
       csv.take(MAX_COMMENT_LINES_IN_HEADER)
-        .find(! _.startsWith(comment.toString))
+        .find(x => !StringUtils.isEmpty(x) && !x.startsWith(comment.toString))
         .getOrElse(sys.error(s"No uncommented header line in " +
           s"first $MAX_COMMENT_LINES_IN_HEADER lines"))
     }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/166d410e/integration/spark/src/test/resources/dataWithNullFirstLine.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/dataWithNullFirstLine.csv b/integration/spark/src/test/resources/dataWithNullFirstLine.csv
new file mode 100644
index 0000000..975f683
--- /dev/null
+++ b/integration/spark/src/test/resources/dataWithNullFirstLine.csv
@@ -0,0 +1,11 @@
+
+11,arvind,SE,17-01-2007,1,developer,10,network,928478,17-02-2007,29-11-2016,96,96.2,5040.56
+12,krithin,SSE,29-05-2008,1,developer,11,protocol,928378,29-06-2008,30-12-2016,85,95.1,7124.21
+13,madhan,TPL,07-07-2009,2,tester,10,network,928478,07-08-2009,30-12-2016,88,99,9054.235
+14,anandh,SA,29-12-2010,3,manager,11,protocol,928278,29-01-2011,29-06-2016,77,92.2,11248.25
+15,ayushi,SSA,09-11-2011,1,developer,12,security,928375,09-12-2011,29-05-2016,99,91.5,13245.48
+16,pramod,SE,14-10-2012,1,developer,13,configManagement,928478,14-11-2012,29-12-2016,86,93,5040.56
+17,gawrav,PL,22-09-2013,2,tester,12,security,928778,22-10-2013,15-11-2016,78,97.45,9574.24
+18,sibi,TL,15-08-2014,2,tester,14,Learning,928176,15-09-2014,29-05-2016,84,98.23,7245.25
+19,shivani,PL,12-05-2015,1,developer,10,network,928977,12-06-2015,12-11-2016,88,91.678,11254.24
+20,bill,PM,01-12-2015,3,manager,14,Learning,928479,01-01-2016,30-11-2016,75,94.22,13547.25

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/166d410e/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithBlankLine.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithBlankLine.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithBlankLine.scala
index b3e1f27..163c11d 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithBlankLine.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataWithBlankLine.scala
@@ -30,20 +30,37 @@ import org.apache.spark.sql.Row
   */
 class TestLoadDataWithBlankLine extends QueryTest with BeforeAndAfterAll {
   override def beforeAll {
+    sql("drop table if exists carbontable")
     sql("CREATE TABLE carbontable (empno int, empname String, designation String, " +
       "doj String, workgroupcategory int, workgroupcategoryname String, deptno int, " +
       "deptname String, projectcode int, projectjoindate String, projectenddate String, " +
       "attendance int,utilization int,salary int) " +
         "STORED BY 'org.apache.carbondata.format'")
     sql("LOAD DATA LOCAL INPATH './src/test/resources/datawithblanklines.csv' INTO TABLE" +
-        " carbontable OPTIONS('DELIMITER'= ',')");
+        " carbontable OPTIONS('DELIMITER'= ',')")
+
+    sql("drop table if exists carbontable2")
+    sql("CREATE TABLE carbontable2 (empno int, empname String, designation String, " +
+      "doj String, workgroupcategory int, workgroupcategoryname String, deptno int, " +
+      "deptname String, projectcode int, projectjoindate String, projectenddate String, " +
+      "attendance int,utilization int,salary int) " +
+      "STORED BY 'org.apache.carbondata.format'")
   }
   test("test carbon table data loading when there are  blank lines in data") {
     checkAnswer(sql("select count(*) from carbontable"),
       Seq(Row(18)))
   }
 
+  test("test carbon table data loading when the first line is blank") {
+    sql("LOAD DATA LOCAL INPATH './src/test/resources/dataWithNullFirstLine.csv' INTO TABLE " +
+      "carbontable2 OPTIONS('DELIMITER'= ',','FILEHEADER'='empno,empname,designation,doj,workgroupcategory,workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate,attendance,utilization,salary')")
+
+    checkAnswer(sql("select count(*) from carbontable2"),
+      Seq(Row(11)))
+  }
+
   override def afterAll {
-    sql("drop table carbontable")
+    sql("drop table if exists carbontable")
+    sql("drop table if exists carbontable2")
   }
 }


[2/2] incubator-carbondata git commit: [CARBONDATA-158] fix load data with first line is null This closes #76

Posted by ch...@apache.org.
[CARBONDATA-158] fix load data with first line is null This closes #76


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5d1a177e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5d1a177e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5d1a177e

Branch: refs/heads/master
Commit: 5d1a177e7c04f88d0115a2aea549706e008aca39
Parents: 518b132 166d410
Author: chenliang613 <ch...@apache.org>
Authored: Sat Aug 27 08:44:42 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Aug 27 08:44:42 2016 +0800

----------------------------------------------------------------------
 .../spark/csv/CarbonCsvRelation.scala           |  3 ++-
 .../test/resources/dataWithNullFirstLine.csv    | 11 ++++++++++
 .../dataload/TestLoadDataWithBlankLine.scala    | 21 ++++++++++++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)
----------------------------------------------------------------------