You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/01/26 09:51:10 UTC
spark git commit: [SQL][MINOR] A few minor tweaks to CSV reader.
Repository: spark
Updated Branches:
refs/heads/master 27c910f7f -> d54cfed5a
[SQL][MINOR] A few minor tweaks to CSV reader.
This pull request simply fixes a few minor coding style issues in csv, as I was reviewing the change post-hoc.
Author: Reynold Xin <rx...@databricks.com>
Closes #10919 from rxin/csv-minor.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d54cfed5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d54cfed5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d54cfed5
Branch: refs/heads/master
Commit: d54cfed5a6953a9ce2b9de2f31ee2d673cb5cc62
Parents: 27c910f
Author: Reynold Xin <rx...@databricks.com>
Authored: Tue Jan 26 00:51:08 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Jan 26 00:51:08 2016 -0800
----------------------------------------------------------------------
.../datasources/csv/CSVInferSchema.scala | 21 ++++++++------------
.../execution/datasources/csv/CSVRelation.scala | 2 +-
2 files changed, 9 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d54cfed5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index 0aa4539..ace8cd7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -30,16 +30,15 @@ import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion
import org.apache.spark.sql.types._
-private[sql] object CSVInferSchema {
+private[csv] object CSVInferSchema {
/**
* Similar to the JSON schema inference
* 1. Infer type of each row
* 2. Merge row types to find common type
* 3. Replace any null types with string type
- * TODO(hossein): Can we reuse JSON schema inference? [SPARK-12670]
*/
- def apply(
+ def infer(
tokenRdd: RDD[Array[String]],
header: Array[String],
nullValue: String = ""): StructType = {
@@ -65,10 +64,7 @@ private[sql] object CSVInferSchema {
rowSoFar
}
- private[csv] def mergeRowTypes(
- first: Array[DataType],
- second: Array[DataType]): Array[DataType] = {
-
+ def mergeRowTypes(first: Array[DataType], second: Array[DataType]): Array[DataType] = {
first.zipAll(second, NullType, NullType).map { case ((a, b)) =>
val tpe = findTightestCommonType(a, b).getOrElse(StringType)
tpe match {
@@ -82,8 +78,7 @@ private[sql] object CSVInferSchema {
* Infer type of string field. Given known type Double, and a string "1", there is no
* point checking if it is an Int, as the final type must be Double or higher.
*/
- private[csv] def inferField(
- typeSoFar: DataType, field: String, nullValue: String = ""): DataType = {
+ def inferField(typeSoFar: DataType, field: String, nullValue: String = ""): DataType = {
if (field == null || field.isEmpty || field == nullValue) {
typeSoFar
} else {
@@ -155,7 +150,8 @@ private[sql] object CSVInferSchema {
}
}
-object CSVTypeCast {
+
+private[csv] object CSVTypeCast {
/**
* Casts given string datum to specified type.
@@ -167,7 +163,7 @@ object CSVTypeCast {
* @param datum string value
* @param castType SparkSQL type
*/
- private[csv] def castTo(
+ def castTo(
datum: String,
castType: DataType,
nullable: Boolean = true,
@@ -201,10 +197,9 @@ object CSVTypeCast {
* Helper method that converts string representation of a character to actual character.
* It handles some Java escaped strings and throws exception if given string is longer than one
* character.
- *
*/
@throws[IllegalArgumentException]
- private[csv] def toChar(str: String): Char = {
+ def toChar(str: String): Char = {
if (str.charAt(0) == '\\') {
str.charAt(1)
match {
http://git-wip-us.apache.org/repos/asf/spark/blob/d54cfed5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index 5959f7c..dc449fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -139,7 +139,7 @@ private[csv] class CSVRelation(
val parsedRdd = tokenRdd(header, paths)
if (params.inferSchemaFlag) {
- CSVInferSchema(parsedRdd, header, params.nullValue)
+ CSVInferSchema.infer(parsedRdd, header, params.nullValue)
} else {
// By default fields are assumed to be StringType
val schemaFields = header.map { fieldName =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org