You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/02/02 19:41:09 UTC

spark git commit: [SPARK-13114][SQL] Add a test for tokens more than the fields in schema

Repository: spark
Updated Branches:
  refs/heads/master 29d92181d -> b93830126


[SPARK-13114][SQL] Add a test for tokens more than the fields in schema

https://issues.apache.org/jira/browse/SPARK-13114

This PR adds a test for tokens more than the fields in schema.

Author: hyukjinkwon <gu...@gmail.com>

Closes #11020 from HyukjinKwon/SPARK-13114.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9383012
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9383012
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9383012

Branch: refs/heads/master
Commit: b93830126cc59a26e2cfb5d7b3c17f9cfbf85988
Parents: 29d9218
Author: hyukjinkwon <gu...@gmail.com>
Authored: Tue Feb 2 10:41:06 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Feb 2 10:41:06 2016 -0800

----------------------------------------------------------------------
 sql/core/src/test/resources/cars-malformed.csv          |  6 ++++++
 .../spark/sql/execution/datasources/csv/CSVSuite.scala  | 12 ++++++++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/resources/cars-malformed.csv
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/cars-malformed.csv b/sql/core/src/test/resources/cars-malformed.csv
new file mode 100644
index 0000000..cfa378c
--- /dev/null
+++ b/sql/core/src/test/resources/cars-malformed.csv
@@ -0,0 +1,6 @@
+~ All the rows here are malformed having tokens more than the schema (header).
+year,make,model,comment,blank
+"2012","Tesla","S","No comment",,null,null
+
+1997,Ford,E350,"Go get one now they are going fast",,null,null
+2015,Chevy,,,,

http://git-wip-us.apache.org/repos/asf/spark/blob/b9383012/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index a79566b..fa4f137 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.types._
 
 class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   private val carsFile = "cars.csv"
+  private val carsMalformedFile = "cars-malformed.csv"
   private val carsFile8859 = "cars_iso-8859-1.csv"
   private val carsTsvFile = "cars.tsv"
   private val carsAltFile = "cars-alternative.csv"
@@ -191,6 +192,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
     assert(exception.getMessage.contains("Malformed line in FAILFAST mode: 2015,Chevy,Volt"))
   }
 
+  test("test for tokens more than the fields in the schema") {
+    val cars = sqlContext
+      .read
+      .format("csv")
+      .option("header", "false")
+      .option("comment", "~")
+      .load(testFile(carsMalformedFile))
+
+    verifyCars(cars, withHeader = false, checkTypes = false)
+  }
+
   test("test with null quote character") {
     val cars = sqlContext.read
       .format("csv")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org