You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uniffle.apache.org by ro...@apache.org on 2022/11/15 03:32:10 UTC
[incubator-uniffle] branch master updated: [Bug] Fix potenial bug when the index reading offset is greater than data length (#320)
This is an automated email from the ASF dual-hosted git repository.
roryqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new eae2621b [Bug] Fix potenial bug when the index reading offset is greater than data length (#320)
eae2621b is described below
commit eae2621b33a51013348a3060ed42e9eb1388c59b
Author: Junfan Zhang <zu...@apache.org>
AuthorDate: Tue Nov 15 11:32:04 2022 +0800
[Bug] Fix potenial bug when the index reading offset is greater than data length (#320)
### What changes were proposed in this pull request?
Fix potenial bug when the index reading offset equals to data length
### Why are the changes needed?
If the reading length == data length in `FixedSizeSegmentSplitter`, it should involve this block. And if its length > data length. it should abort it.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
UTs
---
.../common/segment/FixedSizeSegmentSplitter.java | 28 +++++++++++++++-------
.../segment/FixedSizeSegmentSplitterTest.java | 18 ++++++++++++++
2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/common/src/main/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitter.java b/common/src/main/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitter.java
index 79e59b62..e42f10f1 100644
--- a/common/src/main/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitter.java
+++ b/common/src/main/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitter.java
@@ -22,13 +22,17 @@ import java.nio.ByteBuffer;
import java.util.List;
import com.google.common.collect.Lists;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.apache.uniffle.common.BufferSegment;
import org.apache.uniffle.common.ShuffleDataSegment;
import org.apache.uniffle.common.ShuffleIndexResult;
import org.apache.uniffle.common.exception.RssException;
+import org.apache.uniffle.common.util.Constants;
public class FixedSizeSegmentSplitter implements SegmentSplitter {
+ private static final Logger LOGGER = LoggerFactory.getLogger(FixedSizeSegmentSplitter.class);
private int readBufferSize;
@@ -58,12 +62,13 @@ public class FixedSizeSegmentSplitter implements SegmentSplitter {
while (byteBuffer.hasRemaining()) {
try {
- long offset = byteBuffer.getLong();
- int length = byteBuffer.getInt();
- int uncompressLength = byteBuffer.getInt();
- long crc = byteBuffer.getLong();
- long blockId = byteBuffer.getLong();
- long taskAttemptId = byteBuffer.getLong();
+ final long offset = byteBuffer.getLong();
+ final int length = byteBuffer.getInt();
+ final int uncompressLength = byteBuffer.getInt();
+ final long crc = byteBuffer.getLong();
+ final long blockId = byteBuffer.getLong();
+ final long taskAttemptId = byteBuffer.getLong();
+
// The index file is written, read and parsed sequentially, so these parsed index segments
// index a continuous shuffle data in the corresponding data file and the first segment's
// offset field is the offset of these shuffle data in the data file.
@@ -71,16 +76,21 @@ public class FixedSizeSegmentSplitter implements SegmentSplitter {
fileOffset = offset;
}
- bufferSegments.add(new BufferSegment(blockId, bufferOffset, length, uncompressLength, crc, taskAttemptId));
- bufferOffset += length;
totalLength += length;
// If ShuffleServer is flushing the file at this time, the length in the index file record may be greater
// than the length in the actual data file, and it needs to be returned at this time to avoid EOFException
- if (dataFileLen != -1 && totalLength >= dataFileLen) {
+ if (dataFileLen != -1 && totalLength > dataFileLen) {
+ long mask = (1L << Constants.PARTITION_ID_MAX_LENGTH) - 1;
+ LOGGER.warn("Abort inconsistent data, the data length: {}(bytes) recorded in index file is greater than "
+ + "the real data file length: {}(bytes). Partition id: {}. This should not happen.",
+ totalLength, dataFileLen, Math.toIntExact((blockId >> Constants.TASK_ATTEMPT_ID_MAX_LENGTH) & mask));
break;
}
+ bufferSegments.add(new BufferSegment(blockId, bufferOffset, length, uncompressLength, crc, taskAttemptId));
+ bufferOffset += length;
+
if (bufferOffset >= readBufferSize) {
ShuffleDataSegment sds = new ShuffleDataSegment(fileOffset, bufferOffset, bufferSegments);
dataFileSegments.add(sds);
diff --git a/common/src/test/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitterTest.java b/common/src/test/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitterTest.java
index 9282b801..5655288c 100644
--- a/common/src/test/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitterTest.java
+++ b/common/src/test/java/org/apache/uniffle/common/segment/FixedSizeSegmentSplitterTest.java
@@ -22,6 +22,8 @@ import java.util.List;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
import org.apache.uniffle.common.ShuffleDataSegment;
import org.apache.uniffle.common.ShuffleIndexResult;
@@ -33,6 +35,22 @@ import static org.junit.jupiter.api.Assertions.fail;
public class FixedSizeSegmentSplitterTest {
+ @ParameterizedTest
+ @ValueSource(ints = {48, 49, 57})
+ public void testAvoidEOFException(int dataLength) {
+ SegmentSplitter splitter = new FixedSizeSegmentSplitter(1000);
+ byte[] data = generateData(
+ Pair.of(32, 0),
+ Pair.of(16, 0),
+ Pair.of(10, 0)
+ );
+
+ List<ShuffleDataSegment> shuffleDataSegments = splitter.split(new ShuffleIndexResult(data, dataLength));
+ assertEquals(1, shuffleDataSegments.size());
+ assertEquals(0, shuffleDataSegments.get(0).getOffset());
+ assertEquals(48, shuffleDataSegments.get(0).getLength());
+ }
+
@Test
public void testSplit() {
SegmentSplitter splitter = new FixedSizeSegmentSplitter(100);