You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/09/01 20:48:53 UTC
spark git commit: [SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark
LocalTableScanExec's input data transient
Repository: spark
Updated Branches:
refs/heads/branch-2.2 14054ffc5 -> 50f86e1fe
[SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark LocalTableScanExec's input data transient
This PR is to backport https://github.com/apache/spark/pull/18686 for resolving the issue in https://github.com/apache/spark/pull/19094
---
## What changes were proposed in this pull request?
This PR is to mark the parameter `rows` and `unsafeRow` of LocalTableScanExec transient. It can avoid serializing the unneeded objects.
## How was this patch tested?
N/A
Author: gatorsmile <ga...@gmail.com>
Closes #19101 from gatorsmile/backport-21477.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f86e1f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f86e1f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f86e1f
Branch: refs/heads/branch-2.2
Commit: 50f86e1fe2aad67e4472b24d910ea519b9ad746f
Parents: 14054ff
Author: gatorsmile <ga...@gmail.com>
Authored: Fri Sep 1 13:48:50 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Fri Sep 1 13:48:50 2017 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/execution/LocalTableScanExec.scala | 4 ++--
.../spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala | 8 ++++++++
2 files changed, 10 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 19c68c1..514ad70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -28,12 +28,12 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
*/
case class LocalTableScanExec(
output: Seq[Attribute],
- rows: Seq[InternalRow]) extends LeafExecNode {
+ @transient rows: Seq[InternalRow]) extends LeafExecNode {
override lazy val metrics = Map(
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
- private lazy val unsafeRows: Array[InternalRow] = {
+ @transient private lazy val unsafeRows: Array[InternalRow] = {
if (rows.isEmpty) {
Array.empty
} else {
http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
index 58c3105..6c66902 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
@@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
"select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)",
"select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " +
"select partcol2 from srcpart where partcol1 = 1) t group by partcol2")
+
+ test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") {
+ withTable("t_1000") {
+ sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)")
+ (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)"))
+ sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect()
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org