You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/04/02 07:51:50 UTC

spark git commit: [MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source

Repository: spark
Updated Branches:
  refs/heads/master f41415441 -> d7982a3a9


[MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source

## What changes were proposed in this pull request?

While trying to create a PR (which was not an issue at the end), I just corrected some style nits.

So, I removed the changes except for some coding style corrections.

- According to the [scala-style-guide#documentation-style](https://github.com/databricks/scala-style-guide#documentation-style), Scala style comments are discouraged.

>```scala
>/** This is a correct one-liner, short description. */
>
>/**
>  * This is correct multi-line JavaDoc comment. And
>  * this is my second line, and if I keep typing, this would be
>  * my third line.
>  */
>
>/** In Spark, we don't use the ScalaDoc style so this
>   * is not correct.
>   */
>```

- Double newlines between consecutive methods was removed. According to [scala-style-guide#blank-lines-vertical-whitespace](https://github.com/databricks/scala-style-guide#blank-lines-vertical-whitespace), single newline appears when

>Between consecutive members (or initializers) of a class: fields, constructors, methods, nested classes, static initializers, instance initializers.

- Remove uesless parentheses in tests

- Use `mapPartitions` instead of `mapPartitionsWithIndex()`.

## How was this patch tested?

Unit tests were used and `dev/run_tests` for style tests.

Author: hyukjinkwon <gu...@gmail.com>

Closes #12109 from HyukjinKwon/SPARK-14271.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d7982a3a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d7982a3a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d7982a3a

Branch: refs/heads/master
Commit: d7982a3a9aa804e7e3a2004335e7f314867a5f8a
Parents: f414154
Author: hyukjinkwon <gu...@gmail.com>
Authored: Fri Apr 1 22:51:47 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Apr 1 22:51:47 2016 -0700

----------------------------------------------------------------------
 .../execution/datasources/csv/CSVParser.scala   | 80 ++++++++++----------
 .../execution/datasources/csv/CSVRelation.scala |  6 +-
 .../datasources/csv/DefaultSource.scala         |  1 -
 .../datasources/csv/CSVParserSuite.scala        | 10 +--
 4 files changed, 48 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d7982a3a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
index 7cf1b4c..5570b2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
@@ -25,11 +25,11 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWr
 import org.apache.spark.internal.Logging
 
 /**
-  * Read and parse CSV-like input
-  *
-  * @param params Parameters object
-  * @param headers headers for the columns
-  */
+ * Read and parse CSV-like input
+ *
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
 private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) {
 
   protected lazy val parser: CsvParser = {
@@ -54,11 +54,11 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String])
 }
 
 /**
-  * Converts a sequence of string to CSV string
-  *
-  * @param params Parameters object for configuration
-  * @param headers headers for columns
-  */
+ * Converts a sequence of string to CSV string
+ *
+ * @param params Parameters object for configuration
+ * @param headers headers for columns
+ */
 private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
   private val writerSettings = new CsvWriterSettings
   private val format = writerSettings.getFormat
@@ -90,18 +90,18 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten
 }
 
 /**
-  * Parser for parsing a line at a time. Not efficient for bulk data.
-  *
-  * @param params Parameters object
-  */
+ * Parser for parsing a line at a time. Not efficient for bulk data.
+ *
+ * @param params Parameters object
+ */
 private[sql] class LineCsvReader(params: CSVOptions)
   extends CsvReader(params, null) {
   /**
-    * parse a line
-    *
-    * @param line a String with no newline at the end
-    * @return array of strings where each string is a field in the CSV record
-    */
+   * parse a line
+   *
+   * @param line a String with no newline at the end
+   * @return array of strings where each string is a field in the CSV record
+   */
   def parseLine(line: String): Array[String] = {
     parser.beginParsing(new StringReader(line))
     val parsed = parser.parseNext()
@@ -111,12 +111,12 @@ private[sql] class LineCsvReader(params: CSVOptions)
 }
 
 /**
-  * Parser for parsing lines in bulk. Use this when efficiency is desired.
-  *
-  * @param iter iterator over lines in the file
-  * @param params Parameters object
-  * @param headers headers for the columns
-  */
+ * Parser for parsing lines in bulk. Use this when efficiency is desired.
+ *
+ * @param iter iterator over lines in the file
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
 private[sql] class BulkCsvReader(
     iter: Iterator[String],
     params: CSVOptions,
@@ -128,9 +128,9 @@ private[sql] class BulkCsvReader(
   private var nextRecord = parser.parseNext()
 
   /**
-    * get the next parsed line.
-    * @return array of strings where each string is a field in the CSV record
-    */
+   * get the next parsed line.
+   * @return array of strings where each string is a field in the CSV record
+   */
   override def next(): Array[String] = {
     val curRecord = nextRecord
     if(curRecord != null) {
@@ -146,11 +146,11 @@ private[sql] class BulkCsvReader(
 }
 
 /**
-  * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
-  * end of each line Univocity parser requires a Reader that provides access to the data to be
-  * parsed and needs the newlines to be present
-  * @param iter iterator over RDD[String]
-  */
+ * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
+ * end of each line Univocity parser requires a Reader that provides access to the data to be
+ * parsed and needs the newlines to be present
+ * @param iter iterator over RDD[String]
+ */
 private class StringIteratorReader(val iter: Iterator[String]) extends java.io.Reader {
 
   private var next: Long = 0
@@ -159,9 +159,9 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   private var str: String = null   // current string from iter
 
   /**
-    * fetch next string from iter, if done with current one
-    * pretend there is a new line at the end of every string we get from from iter
-    */
+   * fetch next string from iter, if done with current one
+   * pretend there is a new line at the end of every string we get from from iter
+   */
   private def refill(): Unit = {
     if (length == next) {
       if (iter.hasNext) {
@@ -175,8 +175,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   }
 
   /**
-    * read the next character, if at end of string pretend there is a new line
-    */
+   * read the next character, if at end of string pretend there is a new line
+   */
   override def read(): Int = {
     refill()
     if (next >= length) {
@@ -189,8 +189,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   }
 
   /**
-    * read from str into cbuf
-    */
+   * read from str into cbuf
+   */
   override def read(cbuf: Array[Char], off: Int, len: Int): Int = {
     refill()
     var n = 0

http://git-wip-us.apache.org/repos/asf/spark/blob/d7982a3a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index b47328a..54fb03b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -42,12 +42,12 @@ object CSVRelation extends Logging {
       firstLine: String,
       params: CSVOptions): RDD[Array[String]] = {
     // If header is set, make sure firstLine is materialized before sending to executors.
-    file.mapPartitionsWithIndex({
-      case (split, iter) => new BulkCsvReader(
+    file.mapPartitions { iter =>
+      new BulkCsvReader(
         if (params.headerFlag) iter.filterNot(_ == firstLine) else iter,
         params,
         headers = header)
-    }, true)
+    }
   }
 
   def csvParser(

http://git-wip-us.apache.org/repos/asf/spark/blob/d7982a3a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
index 6b6add4..c0d6f6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
@@ -164,7 +164,6 @@ class DefaultSource extends FileFormat with DataSourceRegister {
     }
   }
 
-
   private def baseRdd(
       sqlContext: SQLContext,
       options: CSVOptions,

http://git-wip-us.apache.org/repos/asf/spark/blob/d7982a3a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
index c0c38c6..dc54883 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
@@ -46,7 +46,7 @@ class CSVParserSuite extends SparkFunSuite {
     var numRead = 0
       var n = 0
       do { // try to fill cbuf
-      var off = 0
+        var off = 0
         var len = cbuf.length
         n = reader.read(cbuf, off, len)
 
@@ -81,7 +81,7 @@ class CSVParserSuite extends SparkFunSuite {
   test("Regular case") {
     val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
     val read = readAll(input.toIterator)
-    assert(read === input.mkString("\n") ++ ("\n"))
+    assert(read === input.mkString("\n") ++ "\n")
   }
 
   test("Empty iter") {
@@ -93,12 +93,12 @@ class CSVParserSuite extends SparkFunSuite {
   test("Embedded new line") {
     val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
     val read = readAll(input.toIterator)
-    assert(read === input.mkString("\n") ++ ("\n"))
+    assert(read === input.mkString("\n") ++ "\n")
   }
 
   test("Buffer Regular case") {
     val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
-    val output = input.mkString("\n") ++ ("\n")
+    val output = input.mkString("\n") ++ "\n"
     for(i <- 1 to output.length + 5) {
       val read = readBufAll(input.toIterator, i)
       assert(read === output)
@@ -116,7 +116,7 @@ class CSVParserSuite extends SparkFunSuite {
 
   test("Buffer Embedded new line") {
     val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
-    val output = input.mkString("\n") ++ ("\n")
+    val output = input.mkString("\n") ++ "\n"
     for(i <- 1 to output.length + 5) {
       val read = readBufAll(input.toIterator, 1)
       assert(read === output)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org