You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/10/02 22:25:37 UTC

spark git commit: [SPARK-22176][SQL] Fix overflow issue in Dataset.show

Repository: spark
Updated Branches:
  refs/heads/master 4329eb2e7 -> fa225da74


[SPARK-22176][SQL] Fix overflow issue in Dataset.show

## What changes were proposed in this pull request?
This pr fixed an overflow issue below in `Dataset.show`:
```
scala> Seq((1, 2), (3, 4)).toDF("a", "b").show(Int.MaxValue)
org.apache.spark.sql.AnalysisException: The limit expression must be equal to or greater than 0, but got -2147483648;;
GlobalLimit -2147483648
+- LocalLimit -2147483648
   +- Project [_1#27218 AS a#27221, _2#27219 AS b#27222]
      +- LocalRelation [_1#27218, _2#27219]

  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:41)
  at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:89)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$$checkLimitClause(CheckAnalysis.scala:70)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:234)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:80)
  at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
```

## How was this patch tested?
Added tests in `DataFrameSuite`.

Author: Takeshi Yamamuro <ya...@apache.org>

Closes #19401 from maropu/MaxValueInShowString.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa225da7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa225da7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa225da7

Branch: refs/heads/master
Commit: fa225da7463e384529da14706e44f4a09772e5c1
Parents: 4329eb2
Author: Takeshi Yamamuro <ya...@apache.org>
Authored: Mon Oct 2 15:25:33 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Mon Oct 2 15:25:33 2017 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/Dataset.scala   |  2 +-
 .../scala/org/apache/spark/sql/DataFrameSuite.scala     | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fa225da7/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f2a76a5..b70dfc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -237,7 +237,7 @@ class Dataset[T] private[sql](
    */
   private[sql] def showString(
       _numRows: Int, truncate: Int = 20, vertical: Boolean = false): String = {
-    val numRows = _numRows.max(0)
+    val numRows = _numRows.max(0).min(Int.MaxValue - 1)
     val takeResult = toDF().take(numRows + 1)
     val hasMoreData = takeResult.length > numRows
     val data = takeResult.take(numRows)

http://git-wip-us.apache.org/repos/asf/spark/blob/fa225da7/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 672deea..dd8f54b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1045,6 +1045,18 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     assert(testData.select($"*").showString(0) === expectedAnswer)
   }
 
+  test("showString(Int.MaxValue)") {
+    val df = Seq((1, 2), (3, 4)).toDF("a", "b")
+    val expectedAnswer = """+---+---+
+                           ||  a|  b|
+                           |+---+---+
+                           ||  1|  2|
+                           ||  3|  4|
+                           |+---+---+
+                           |""".stripMargin
+    assert(df.showString(Int.MaxValue) === expectedAnswer)
+  }
+
   test("showString(0), vertical = true") {
     val expectedAnswer = "(0 rows)\n"
     assert(testData.select($"*").showString(0, vertical = true) === expectedAnswer)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org