You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2016/09/30 16:58:01 UTC

spark git commit: [SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache

Repository: spark
Updated Branches:
  refs/heads/master 8e491af52 -> f327e1686


[SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache

## What changes were proposed in this pull request?

The actualSize() of array and map is different from the actual size, the header is Int, rather than Long.

## How was this patch tested?

The flaky test should be fixed.

Author: Davies Liu <da...@databricks.com>

Closes #15305 from davies/fix_MAP.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f327e168
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f327e168
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f327e168

Branch: refs/heads/master
Commit: f327e16863371076dbd2a7f22c8895ae07f8274b
Parents: 8e491af
Author: Davies Liu <da...@databricks.com>
Authored: Fri Sep 30 09:59:12 2016 -0700
Committer: Davies Liu <da...@gmail.com>
Committed: Fri Sep 30 09:59:12 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/columnar/ColumnType.scala | 8 ++++----
 .../spark/sql/execution/columnar/ColumnTypeSuite.scala       | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index fa9619e..d27d8c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -589,7 +589,7 @@ private[columnar] case class STRUCT(dataType: StructType)
 private[columnar] case class ARRAY(dataType: ArrayType)
   extends ColumnType[UnsafeArrayData] with DirectCopyColumnType[UnsafeArrayData] {
 
-  override def defaultSize: Int = 16
+  override def defaultSize: Int = 28
 
   override def setField(row: MutableRow, ordinal: Int, value: UnsafeArrayData): Unit = {
     row.update(ordinal, value)
@@ -601,7 +601,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
 
   override def actualSize(row: InternalRow, ordinal: Int): Int = {
     val unsafeArray = getField(row, ordinal)
-    8 + unsafeArray.getSizeInBytes
+    4 + unsafeArray.getSizeInBytes
   }
 
   override def append(value: UnsafeArrayData, buffer: ByteBuffer): Unit = {
@@ -628,7 +628,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
 private[columnar] case class MAP(dataType: MapType)
   extends ColumnType[UnsafeMapData] with DirectCopyColumnType[UnsafeMapData] {
 
-  override def defaultSize: Int = 32
+  override def defaultSize: Int = 68
 
   override def setField(row: MutableRow, ordinal: Int, value: UnsafeMapData): Unit = {
     row.update(ordinal, value)
@@ -640,7 +640,7 @@ private[columnar] case class MAP(dataType: MapType)
 
   override def actualSize(row: InternalRow, ordinal: Int): Int = {
     val unsafeMap = getField(row, ordinal)
-    8 + unsafeMap.getSizeInBytes
+    4 + unsafeMap.getSizeInBytes
   }
 
   override def append(value: UnsafeMapData, buffer: ByteBuffer): Unit = {

http://git-wip-us.apache.org/repos/asf/spark/blob/f327e168/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index 0b93c63..805b566 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -38,7 +38,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
     val checks = Map(
       NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8,
       FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12,
-      STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 16, MAP_TYPE -> 32)
+      STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68)
 
     checks.foreach { case (columnType, expectedSize) =>
       assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -73,8 +73,8 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
     checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4)
     checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8)
     checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5)
-    checkActualSize(ARRAY_TYPE, Array[Any](1), 8 + 8 + 8 + 8)
-    checkActualSize(MAP_TYPE, Map(1 -> "a"), 8 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
+    checkActualSize(ARRAY_TYPE, Array[Any](1), 4 + 8 + 8 + 8)
+    checkActualSize(MAP_TYPE, Map(1 -> "a"), 4 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
     checkActualSize(STRUCT_TYPE, Row("hello"), 28)
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org