You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/09/01 20:48:53 UTC

spark git commit: [SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark LocalTableScanExec's input data transient

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 14054ffc5 -> 50f86e1fe


[SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark LocalTableScanExec's input data transient

This PR is to backport https://github.com/apache/spark/pull/18686 for resolving the issue in https://github.com/apache/spark/pull/19094

---

## What changes were proposed in this pull request?
This PR is to mark the parameter `rows` and `unsafeRow` of LocalTableScanExec transient. It can avoid serializing the unneeded objects.

## How was this patch tested?
N/A

Author: gatorsmile <ga...@gmail.com>

Closes #19101 from gatorsmile/backport-21477.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f86e1f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f86e1f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f86e1f

Branch: refs/heads/branch-2.2
Commit: 50f86e1fe2aad67e4472b24d910ea519b9ad746f
Parents: 14054ff
Author: gatorsmile <ga...@gmail.com>
Authored: Fri Sep 1 13:48:50 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Fri Sep 1 13:48:50 2017 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/LocalTableScanExec.scala  | 4 ++--
 .../spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 19c68c1..514ad70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -28,12 +28,12 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
  */
 case class LocalTableScanExec(
     output: Seq[Attribute],
-    rows: Seq[InternalRow]) extends LeafExecNode {
+    @transient rows: Seq[InternalRow]) extends LeafExecNode {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
-  private lazy val unsafeRows: Array[InternalRow] = {
+  @transient private lazy val unsafeRows: Array[InternalRow] = {
     if (rows.isEmpty) {
       Array.empty
     } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
index 58c3105..6c66902 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
@@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
     "select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)",
     "select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " +
       "select partcol2 from srcpart where partcol1 = 1) t group by partcol2")
+
+  test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") {
+    withTable("t_1000") {
+      sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)")
+      (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)"))
+      sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect()
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org