You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2021/08/10 17:31:29 UTC
[spark] branch branch-3.2 updated: [SPARK-36464][CORE] Fix Underlying Size Variable Initialization in ChunkedByteBufferOutputStream for Writing Over 2GB Data

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new c230ca9  [SPARK-36464][CORE] Fix Underlying Size Variable Initialization in ChunkedByteBufferOutputStream for Writing Over 2GB Data
c230ca9 is described below

commit c230ca95e66cacad4ecd381321a98e7f65c87d9d
Author: Kazuyuki Tanimura <kt...@apple.com>
AuthorDate: Tue Aug 10 10:29:54 2021 -0700

    [SPARK-36464][CORE] Fix Underlying Size Variable Initialization in ChunkedByteBufferOutputStream for Writing Over 2GB Data
    
    ### What changes were proposed in this pull request?
    The `size` method of `ChunkedByteBufferOutputStream` returns a `Long` value; however, the underlying `_size` variable is initialized as `Int`.
    That causes an overflow and returns a negative size when over 2GB data is written into `ChunkedByteBufferOutputStream`
    
    This PR proposes to change the underlying `_size` variable from `Int` to `Long` at the initialization
    
    ### Why are the changes needed?
    Be cause the `size` method of `ChunkedByteBufferOutputStream` incorrectly returns a negative value when over 2GB data is written.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Passed existing tests
    ```
    build/sbt "core/testOnly *ChunkedByteBufferOutputStreamSuite"
    ```
    Also added a new unit test
    ```
    build/sbt "core/testOnly *ChunkedByteBufferOutputStreamSuite – -z SPARK-36464"
    ```
    
    Closes #33690 from kazuyukitanimura/SPARK-36464.
    
    Authored-by: Kazuyuki Tanimura <kt...@apple.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit c888bad6a12b45f3eda8d898bdd90405985ee05c)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../apache/spark/util/io/ChunkedByteBufferOutputStream.scala   |  3 ++-
 .../spark/util/io/ChunkedByteBufferOutputStreamSuite.scala     | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
index a625b32..34d3665 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
@@ -48,7 +48,7 @@ private[spark] class ChunkedByteBufferOutputStream(
    * This can also never be 0.
    */
   private[this] var position = chunkSize
-  private[this] var _size = 0
+  private[this] var _size = 0L
   private[this] var closed: Boolean = false
 
   def size: Long = _size
@@ -120,4 +120,5 @@ private[spark] class ChunkedByteBufferOutputStream(
       new ChunkedByteBuffer(ret)
     }
   }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
index 8696174..29443e2 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
@@ -119,4 +119,14 @@ class ChunkedByteBufferOutputStreamSuite extends SparkFunSuite {
     assert(arrays(1).toSeq === ref.slice(10, 20))
     assert(arrays(2).toSeq === ref.slice(20, 30))
   }
+
+  test("SPARK-36464: size returns correct positive number even with over 2GB data") {
+    val ref = new Array[Byte](1024 * 1024 * 1024)
+    val o = new ChunkedByteBufferOutputStream(1024 * 1024, ByteBuffer.allocate)
+    o.write(ref)
+    o.write(ref)
+    o.close()
+    assert(o.size > 0L) // make sure it is not overflowing
+    assert(o.size == ref.length.toLong * 2)
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org