You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/02/21 20:29:01 UTC
hive git commit: HIVE-18757 : LLAP IO for text fails for empty files (Sergey Shelukhin, reviewed by Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/master 3f2838388 -> c544dae19


HIVE-18757 : LLAP IO for text fails for empty files (Sergey Shelukhin, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c544dae1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c544dae1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c544dae1

Branch: refs/heads/master
Commit: c544dae19c43c98d1489f0db9551c30f5f8f8985
Parents: 3f28383
Author: sergey <se...@apache.org>
Authored: Wed Feb 21 12:11:18 2018 -0800
Committer: sergey <se...@apache.org>
Committed: Wed Feb 21 12:28:58 2018 -0800

----------------------------------------------------------------------
 .../llap/io/encoded/SerDeEncodedDataReader.java | 24 ++++++++++++--------
 1 file changed, 15 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c544dae1/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 2626f3e..166abf7 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -102,6 +102,7 @@ import org.apache.orc.impl.StreamName;
 import org.apache.tez.common.CallableWithNdc;
 import org.apache.tez.common.counters.TezCounters;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 public class SerDeEncodedDataReader extends CallableWithNdc<Void>
@@ -848,7 +849,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
     try {
       if (slice != null) {
         // If we had a cache range already, we expect a single matching disk slice.
+        // Given that there's cached data we expect there to be some disk data.
         Vectors vectors = currentFileRead.readNextSlice();
+        assert vectors != null;
         if (!vectors.isSupported()) {
           // Not in VRB mode - the new cache data is ready, we should use it.
           CacheWriter cacheWriter = currentFileRead.getCacheWriter();
@@ -864,8 +867,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
         ++stripeIxRef.value;
       } else {
         // All the data comes from disk. The reader may have split it into multiple slices.
+        // It is also possible there's no data in the file.
         Vectors vectors = currentFileRead.readNextSlice();
-        assert vectors != null;
+        if (vectors == null) return true;
         result = true;
         if (!vectors.isSupported()) {
           // Not in VRB mode - the new cache data is (partially) ready, we should use it.
@@ -1255,7 +1259,7 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
     private int rowsPerSlice = 0;
     private long currentKnownTornStart;
     private long lastStartOffset = Long.MIN_VALUE, firstStartOffset = Long.MIN_VALUE;
-    private boolean hasUnsplittableData = false;
+    private boolean hasAnyData = false;
     private final EncodingWriter writer;
     private final boolean maySplitTheSplit;
     private final int targetSliceRowCount;
@@ -1263,6 +1267,7 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
 
     public FileReaderYieldReturn(ReaderWithOffsets offsetReader, FileSplit split, EncodingWriter writer,
         boolean maySplitTheSplit, int targetSliceRowCount) {
+      Preconditions.checkNotNull(offsetReader);
       this.offsetReader = offsetReader;
       currentKnownTornStart = split.getStart();
       this.writer = writer;
@@ -1276,10 +1281,12 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
     }
 
     public Vectors readNextSlice() throws IOException {
-      if (offsetReader == null) return null;
+      if (offsetReader == null) {
+        return null; // This means the reader has already been closed.
+      }
       try {
         while (offsetReader.next()) {
-          hasUnsplittableData = true;
+          hasAnyData = true;
           Writable value = offsetReader.getCurrentRow();
           lastStartOffset = offsetReader.getCurrentRowStartOffset();
           if (firstStartOffset == Long.MIN_VALUE) {
@@ -1310,12 +1317,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
           }
         }
         try {
-          Vectors result = null;
-          if (rowsPerSlice > 0 || (!maySplitTheSplit && hasUnsplittableData)) {
+          if (rowsPerSlice > 0 || (!maySplitTheSplit && hasAnyData)) {
             long fileOffset = -1;
             if (!offsetReader.hasOffsets()) {
               // The reader doesn't support offsets. We adjust offsets to match future splits.
-              // If cached split was starting at row start, that row would be skipped, so +1
+              // If cached split was starting at row start, that row would be skipped, so +1 byte.
               firstStartOffset = split.getStart() + 1;
               // Last row starting at the end of the split would be read.
               lastStartOffset = split.getStart() + split.getLength();
@@ -1333,11 +1339,11 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
                 currentKnownTornStart, firstStartOffset, lastStartOffset, fileOffset);
             // Close the writer to finalize the metadata.
             writer.close();
-            result = new Vectors(writer.extractCurrentVrbs());
+            return new Vectors(writer.extractCurrentVrbs());
           } else {
             writer.close();
+            return null; // There's no more data.
           }
-          return result;
         } finally {
           closeOffsetReader();
         }