You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2021/08/10 17:34:03 UTC
[spark] branch branch-3.1 updated: [SPARK-36464][CORE] Fix
Underlying Size Variable Initialization in ChunkedByteBufferOutputStream
for Writing Over 2GB Data
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 2deb395 [SPARK-36464][CORE] Fix Underlying Size Variable Initialization in ChunkedByteBufferOutputStream for Writing Over 2GB Data
2deb395 is described below
commit 2deb3954f969ff8c6845b2c07496b692ef0fd3f8
Author: Kazuyuki Tanimura <kt...@apple.com>
AuthorDate: Tue Aug 10 10:29:54 2021 -0700
[SPARK-36464][CORE] Fix Underlying Size Variable Initialization in ChunkedByteBufferOutputStream for Writing Over 2GB Data
### What changes were proposed in this pull request?
The `size` method of `ChunkedByteBufferOutputStream` returns a `Long` value; however, the underlying `_size` variable is initialized as `Int`.
That causes an overflow and returns a negative size when over 2GB data is written into `ChunkedByteBufferOutputStream`
This PR proposes to change the underlying `_size` variable from `Int` to `Long` at the initialization
### Why are the changes needed?
Be cause the `size` method of `ChunkedByteBufferOutputStream` incorrectly returns a negative value when over 2GB data is written.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Passed existing tests
```
build/sbt "core/testOnly *ChunkedByteBufferOutputStreamSuite"
```
Also added a new unit test
```
build/sbt "core/testOnly *ChunkedByteBufferOutputStreamSuite – -z SPARK-36464"
```
Closes #33690 from kazuyukitanimura/SPARK-36464.
Authored-by: Kazuyuki Tanimura <kt...@apple.com>
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
(cherry picked from commit c888bad6a12b45f3eda8d898bdd90405985ee05c)
Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
.../apache/spark/util/io/ChunkedByteBufferOutputStream.scala | 3 ++-
.../spark/util/io/ChunkedByteBufferOutputStreamSuite.scala | 10 ++++++++++
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
index a625b32..34d3665 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStream.scala
@@ -48,7 +48,7 @@ private[spark] class ChunkedByteBufferOutputStream(
* This can also never be 0.
*/
private[this] var position = chunkSize
- private[this] var _size = 0
+ private[this] var _size = 0L
private[this] var closed: Boolean = false
def size: Long = _size
@@ -120,4 +120,5 @@ private[spark] class ChunkedByteBufferOutputStream(
new ChunkedByteBuffer(ret)
}
}
+
}
diff --git a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
index 8696174..29443e2 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ChunkedByteBufferOutputStreamSuite.scala
@@ -119,4 +119,14 @@ class ChunkedByteBufferOutputStreamSuite extends SparkFunSuite {
assert(arrays(1).toSeq === ref.slice(10, 20))
assert(arrays(2).toSeq === ref.slice(20, 30))
}
+
+ test("SPARK-36464: size returns correct positive number even with over 2GB data") {
+ val ref = new Array[Byte](1024 * 1024 * 1024)
+ val o = new ChunkedByteBufferOutputStream(1024 * 1024, ByteBuffer.allocate)
+ o.write(ref)
+ o.write(ref)
+ o.close()
+ assert(o.size > 0L) // make sure it is not overflowing
+ assert(o.size == ref.length.toLong * 2)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org