You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/02/19 23:46:59 UTC

spark git commit: [SPARK-13261][SQL] Expose maxCharactersPerColumn as a user configurable option

Repository: spark
Updated Branches:
  refs/heads/master dbb08cdd5 -> 14844118b


[SPARK-13261][SQL] Expose maxCharactersPerColumn as a user configurable option

This patch expose `maxCharactersPerColumn` and `maxColumns` to user in CSV data source.

Author: Hossein <ho...@databricks.com>

Closes #11147 from falaki/SPARK-13261.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14844118
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14844118
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14844118

Branch: refs/heads/master
Commit: 14844118b596a93dbc28b442a7ea2b58fa4df648
Parents: dbb08cd
Author: Hossein <ho...@databricks.com>
Authored: Fri Feb 19 14:46:56 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Fri Feb 19 14:46:56 2016 -0800

----------------------------------------------------------------------
 .../sql/execution/datasources/csv/CSVOptions.scala | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/14844118/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 709dacc..bea8e97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -36,6 +36,19 @@ private[sql] class CSVOptions(
     }
   }
 
+  private def getInt(paramName: String, default: Int): Int = {
+    val paramValue = parameters.get(paramName)
+    paramValue match {
+      case None => default
+      case Some(value) => try {
+        value.toInt
+      } catch {
+        case e: NumberFormatException =>
+          throw new RuntimeException(s"$paramName should be an integer. Found $value")
+      }
+    }
+  }
+
   private def getBool(paramName: String, default: Boolean = false): Boolean = {
     val param = parameters.getOrElse(paramName, default.toString)
     if (param.toLowerCase == "true") {
@@ -81,9 +94,9 @@ private[sql] class CSVOptions(
     name.map(CompressionCodecs.getCodecClassName)
   }
 
-  val maxColumns = 20480
+  val maxColumns = getInt("maxColumns", 20480)
 
-  val maxCharsPerColumn = 100000
+  val maxCharsPerColumn = getInt("maxCharsPerColumn", 1000000)
 
   val inputBufferSize = 128
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org