You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/02/21 20:29:01 UTC
hive git commit: HIVE-18757 : LLAP IO for text fails for empty files
(Sergey Shelukhin, reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master 3f2838388 -> c544dae19
HIVE-18757 : LLAP IO for text fails for empty files (Sergey Shelukhin, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c544dae1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c544dae1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c544dae1
Branch: refs/heads/master
Commit: c544dae19c43c98d1489f0db9551c30f5f8f8985
Parents: 3f28383
Author: sergey <se...@apache.org>
Authored: Wed Feb 21 12:11:18 2018 -0800
Committer: sergey <se...@apache.org>
Committed: Wed Feb 21 12:28:58 2018 -0800
----------------------------------------------------------------------
.../llap/io/encoded/SerDeEncodedDataReader.java | 24 ++++++++++++--------
1 file changed, 15 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c544dae1/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 2626f3e..166abf7 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -102,6 +102,7 @@ import org.apache.orc.impl.StreamName;
import org.apache.tez.common.CallableWithNdc;
import org.apache.tez.common.counters.TezCounters;
+import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
public class SerDeEncodedDataReader extends CallableWithNdc<Void>
@@ -848,7 +849,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
try {
if (slice != null) {
// If we had a cache range already, we expect a single matching disk slice.
+ // Given that there's cached data we expect there to be some disk data.
Vectors vectors = currentFileRead.readNextSlice();
+ assert vectors != null;
if (!vectors.isSupported()) {
// Not in VRB mode - the new cache data is ready, we should use it.
CacheWriter cacheWriter = currentFileRead.getCacheWriter();
@@ -864,8 +867,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
++stripeIxRef.value;
} else {
// All the data comes from disk. The reader may have split it into multiple slices.
+ // It is also possible there's no data in the file.
Vectors vectors = currentFileRead.readNextSlice();
- assert vectors != null;
+ if (vectors == null) return true;
result = true;
if (!vectors.isSupported()) {
// Not in VRB mode - the new cache data is (partially) ready, we should use it.
@@ -1255,7 +1259,7 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
private int rowsPerSlice = 0;
private long currentKnownTornStart;
private long lastStartOffset = Long.MIN_VALUE, firstStartOffset = Long.MIN_VALUE;
- private boolean hasUnsplittableData = false;
+ private boolean hasAnyData = false;
private final EncodingWriter writer;
private final boolean maySplitTheSplit;
private final int targetSliceRowCount;
@@ -1263,6 +1267,7 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
public FileReaderYieldReturn(ReaderWithOffsets offsetReader, FileSplit split, EncodingWriter writer,
boolean maySplitTheSplit, int targetSliceRowCount) {
+ Preconditions.checkNotNull(offsetReader);
this.offsetReader = offsetReader;
currentKnownTornStart = split.getStart();
this.writer = writer;
@@ -1276,10 +1281,12 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
}
public Vectors readNextSlice() throws IOException {
- if (offsetReader == null) return null;
+ if (offsetReader == null) {
+ return null; // This means the reader has already been closed.
+ }
try {
while (offsetReader.next()) {
- hasUnsplittableData = true;
+ hasAnyData = true;
Writable value = offsetReader.getCurrentRow();
lastStartOffset = offsetReader.getCurrentRowStartOffset();
if (firstStartOffset == Long.MIN_VALUE) {
@@ -1310,12 +1317,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
}
}
try {
- Vectors result = null;
- if (rowsPerSlice > 0 || (!maySplitTheSplit && hasUnsplittableData)) {
+ if (rowsPerSlice > 0 || (!maySplitTheSplit && hasAnyData)) {
long fileOffset = -1;
if (!offsetReader.hasOffsets()) {
// The reader doesn't support offsets. We adjust offsets to match future splits.
- // If cached split was starting at row start, that row would be skipped, so +1
+ // If cached split was starting at row start, that row would be skipped, so +1 byte.
firstStartOffset = split.getStart() + 1;
// Last row starting at the end of the split would be read.
lastStartOffset = split.getStart() + split.getLength();
@@ -1333,11 +1339,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset);
// Close the writer to finalize the metadata.
writer.close();
- result = new Vectors(writer.extractCurrentVrbs());
+ return new Vectors(writer.extractCurrentVrbs());
} else {
writer.close();
+ return null; // There's no more data.
}
- return result;
} finally {
closeOffsetReader();
}