You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2020/04/29 13:07:43 UTC

[GitHub] [hive] pgaref commented on a change in pull request #998: HIVE-22769 Fix SkippingTextInputFormat

pgaref commented on a change in pull request #998:
URL: https://github.com/apache/hive/pull/998#discussion_r417299400



##########
File path: llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/PassThruOffsetReader.java
##########
@@ -20,23 +20,81 @@
 import java.io.IOException;
 
 import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.ReaderWithOffsets;
+import org.apache.hadoop.hive.ql.exec.FooterBuffer;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 
 @SuppressWarnings("rawtypes") class PassThruOffsetReader implements ReaderWithOffsets {
   protected final RecordReader sourceReader;
   protected final Object key;
   protected final Writable value;
+  protected final JobConf jobConf;
+  protected final int skipHeaderCnt;
+  protected final int skipFooterCnt;
+  private transient FooterBuffer footerBuffer;
+  private transient boolean initialized = false;
 
-  PassThruOffsetReader(RecordReader sourceReader) {
+  PassThruOffsetReader(RecordReader sourceReader, JobConf jobConf, int headerCnt, int footerCnt) {
     this.sourceReader = sourceReader;
-    key = sourceReader.createKey();
-    value = (Writable)sourceReader.createValue();
+    this.key = sourceReader.createKey();
+    this.value = (Writable)sourceReader.createValue();
+    this.jobConf = jobConf;
+    this.skipHeaderCnt = headerCnt;
+    this.skipFooterCnt = footerCnt;
   }
 
   @Override
   public boolean next() throws IOException {
-    return sourceReader.next(key, value);
+    try {
+      boolean opNotEOF = true;
+      /**
+       * Start reading a new file.
+       * If file contains header, skip header lines before reading the records.
+       * If file contains footer, used FooterBuffer to cache and remove footer
+       * records at the end of the file.
+       */
+      if (!initialized) {
+        // Skip header lines.
+        opNotEOF = Utilities.skipHeader(sourceReader, skipFooterCnt, key, value);
+
+        // Initialize footer buffer.
+        if (opNotEOF && skipFooterCnt > 0) {
+          footerBuffer = new FooterBuffer();
+          opNotEOF = footerBuffer.initializeBuffer(jobConf, sourceReader, skipFooterCnt, (WritableComparable) key, value);
+        }
+        this.initialized = true;
+      }
+
+      if (opNotEOF && footerBuffer == null) {
+        /**
+         * When file doesn't end after skipping header line
+         * and there is NO footer lines, read normally.
+         */
+        opNotEOF = sourceReader.next(key, value);
+      }
+
+      if (opNotEOF && footerBuffer != null) {
+        /**
+         * When file doesn't end after skipping header line
+         * and there IS footer lines, update footerBuffer
+         */
+        opNotEOF = footerBuffer.updateBuffer(jobConf, sourceReader, (WritableComparable) key, value);
+      }
+
+      if (opNotEOF) {
+        // File reached the end
+        return true;
+      } else {
+        // Done reading
+        close();

Review comment:
       You are right, EncodedDataReader does take care of closing the reader -- so removing it from here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org