You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/02/19 23:46:59 UTC
spark git commit: [SPARK-13261][SQL] Expose maxCharactersPerColumn as
a user configurable option
Repository: spark
Updated Branches:
refs/heads/master dbb08cdd5 -> 14844118b
[SPARK-13261][SQL] Expose maxCharactersPerColumn as a user configurable option
This patch expose `maxCharactersPerColumn` and `maxColumns` to user in CSV data source.
Author: Hossein <ho...@databricks.com>
Closes #11147 from falaki/SPARK-13261.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14844118
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14844118
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14844118
Branch: refs/heads/master
Commit: 14844118b596a93dbc28b442a7ea2b58fa4df648
Parents: dbb08cd
Author: Hossein <ho...@databricks.com>
Authored: Fri Feb 19 14:46:56 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Fri Feb 19 14:46:56 2016 -0800
----------------------------------------------------------------------
.../sql/execution/datasources/csv/CSVOptions.scala | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/14844118/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 709dacc..bea8e97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -36,6 +36,19 @@ private[sql] class CSVOptions(
}
}
+ private def getInt(paramName: String, default: Int): Int = {
+ val paramValue = parameters.get(paramName)
+ paramValue match {
+ case None => default
+ case Some(value) => try {
+ value.toInt
+ } catch {
+ case e: NumberFormatException =>
+ throw new RuntimeException(s"$paramName should be an integer. Found $value")
+ }
+ }
+ }
+
private def getBool(paramName: String, default: Boolean = false): Boolean = {
val param = parameters.getOrElse(paramName, default.toString)
if (param.toLowerCase == "true") {
@@ -81,9 +94,9 @@ private[sql] class CSVOptions(
name.map(CompressionCodecs.getCodecClassName)
}
- val maxColumns = 20480
+ val maxColumns = getInt("maxColumns", 20480)
- val maxCharsPerColumn = 100000
+ val maxCharsPerColumn = getInt("maxCharsPerColumn", 1000000)
val inputBufferSize = 128
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org