You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2016/09/30 16:58:01 UTC
spark git commit: [SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache
Repository: spark
Updated Branches:
refs/heads/master 8e491af52 -> f327e1686
[SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache
## What changes were proposed in this pull request?
The actualSize() of array and map is different from the actual size, the header is Int, rather than Long.
## How was this patch tested?
The flaky test should be fixed.
Author: Davies Liu <da...@databricks.com>
Closes #15305 from davies/fix_MAP.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f327e168
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f327e168
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f327e168
Branch: refs/heads/master
Commit: f327e16863371076dbd2a7f22c8895ae07f8274b
Parents: 8e491af
Author: Davies Liu <da...@databricks.com>
Authored: Fri Sep 30 09:59:12 2016 -0700
Committer: Davies Liu <da...@gmail.com>
Committed: Fri Sep 30 09:59:12 2016 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/execution/columnar/ColumnType.scala | 8 ++++----
.../spark/sql/execution/columnar/ColumnTypeSuite.scala | 6 +++---
2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index fa9619e..d27d8c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -589,7 +589,7 @@ private[columnar] case class STRUCT(dataType: StructType)
private[columnar] case class ARRAY(dataType: ArrayType)
extends ColumnType[UnsafeArrayData] with DirectCopyColumnType[UnsafeArrayData] {
- override def defaultSize: Int = 16
+ override def defaultSize: Int = 28
override def setField(row: MutableRow, ordinal: Int, value: UnsafeArrayData): Unit = {
row.update(ordinal, value)
@@ -601,7 +601,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
override def actualSize(row: InternalRow, ordinal: Int): Int = {
val unsafeArray = getField(row, ordinal)
- 8 + unsafeArray.getSizeInBytes
+ 4 + unsafeArray.getSizeInBytes
}
override def append(value: UnsafeArrayData, buffer: ByteBuffer): Unit = {
@@ -628,7 +628,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
private[columnar] case class MAP(dataType: MapType)
extends ColumnType[UnsafeMapData] with DirectCopyColumnType[UnsafeMapData] {
- override def defaultSize: Int = 32
+ override def defaultSize: Int = 68
override def setField(row: MutableRow, ordinal: Int, value: UnsafeMapData): Unit = {
row.update(ordinal, value)
@@ -640,7 +640,7 @@ private[columnar] case class MAP(dataType: MapType)
override def actualSize(row: InternalRow, ordinal: Int): Int = {
val unsafeMap = getField(row, ordinal)
- 8 + unsafeMap.getSizeInBytes
+ 4 + unsafeMap.getSizeInBytes
}
override def append(value: UnsafeMapData, buffer: ByteBuffer): Unit = {
http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index 0b93c63..805b566 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -38,7 +38,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
val checks = Map(
NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8,
FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12,
- STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 16, MAP_TYPE -> 32)
+ STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68)
checks.foreach { case (columnType, expectedSize) =>
assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -73,8 +73,8 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4)
checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8)
checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5)
- checkActualSize(ARRAY_TYPE, Array[Any](1), 8 + 8 + 8 + 8)
- checkActualSize(MAP_TYPE, Map(1 -> "a"), 8 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
+ checkActualSize(ARRAY_TYPE, Array[Any](1), 4 + 8 + 8 + 8)
+ checkActualSize(MAP_TYPE, Map(1 -> "a"), 4 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
checkActualSize(STRUCT_TYPE, Row("hello"), 28)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org