You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2016/04/24 06:15:37 UTC
spark git commit: [SPARK-14838] [SQL] Set default size for ObjecType
to avoid failure when estimating sizeInBytes in ObjectProducer
Repository: spark
Updated Branches:
refs/heads/master 1b7eab74e -> ba5e0b87a
[SPARK-14838] [SQL] Set default size for ObjecType to avoid failure when estimating sizeInBytes in ObjectProducer
## What changes were proposed in this pull request?
We have logical plans that produce domain objects which are `ObjectType`. As we can't estimate the size of `ObjectType`, we throw an `UnsupportedOperationException` if trying to do that. We should set a default size for `ObjectType` to avoid this failure.
## How was this patch tested?
`DatasetSuite`.
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Closes #12599 from viirya/skip-broadcast-objectproducer.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba5e0b87
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba5e0b87
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba5e0b87
Branch: refs/heads/master
Commit: ba5e0b87a043e46e9599695c82d90e7572185aa5
Parents: 1b7eab7
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Authored: Sat Apr 23 21:15:31 2016 -0700
Committer: Davies Liu <da...@gmail.com>
Committed: Sat Apr 23 21:15:31 2016 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/types/ObjectType.scala | 3 +--
.../org/apache/spark/sql/DatasetSuite.scala | 23 ++++++++++++++++++++
2 files changed, 24 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/ba5e0b87/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
index b7b1acc..c741a2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
@@ -37,8 +37,7 @@ private[sql] object ObjectType extends AbstractDataType {
* outside of the execution engine.
*/
private[sql] case class ObjectType(cls: Class[_]) extends DataType {
- override def defaultSize: Int =
- throw new UnsupportedOperationException("No size estimation available for objects.")
+ override def defaultSize: Int = 4096
def asNullable: DataType = this
http://git-wip-us.apache.org/repos/asf/spark/blob/ba5e0b87/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index a6e3bd3..eee21ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -630,6 +630,29 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
// Make sure the generated code for this plan can compile and execute.
checkDataset(wideDF.map(_.getLong(0)), 0L until 10 : _*)
}
+
+ test("SPARK-14838: estimating sizeInBytes in operators with ObjectProducer shouldn't fail") {
+ val dataset = Seq(
+ (0, 3, 54f),
+ (0, 4, 44f),
+ (0, 5, 42f),
+ (1, 3, 39f),
+ (1, 5, 33f),
+ (1, 4, 26f),
+ (2, 3, 51f),
+ (2, 5, 45f),
+ (2, 4, 30f)
+ ).toDF("user", "item", "rating")
+
+ val actual = dataset
+ .select("user", "item")
+ .as[(Int, Int)]
+ .groupByKey(_._1)
+ .mapGroups { case (src, ids) => (src, ids.map(_._2).toArray) }
+ .toDF("id", "actual")
+
+ dataset.join(actual, dataset("user") === actual("id")).collect()
+ }
}
case class OtherTuple(_1: String, _2: Int)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org