You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2016/12/08 15:09:29 UTC
hive git commit: HIVE-15359: skip.footer.line.count doesnt work
properly for certain situations (Yongzhi Chen, reviewed by Aihua Xu)
Repository: hive
Updated Branches:
refs/heads/master b920fa735 -> b4980afd3
HIVE-15359: skip.footer.line.count doesnt work properly for certain situations (Yongzhi Chen, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4980afd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4980afd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4980afd
Branch: refs/heads/master
Commit: b4980afd3500b7367a82dc1fc5cd01fe942c53d1
Parents: b920fa7
Author: Yongzhi Chen <yc...@apache.org>
Authored: Mon Dec 5 17:45:41 2016 -0500
Committer: Yongzhi Chen <yc...@apache.org>
Committed: Thu Dec 8 09:50:45 2016 -0500
----------------------------------------------------------------------
.../hadoop/hive/ql/io/CombineHiveRecordReader.java | 12 +++++++++++-
.../hadoop/hive/ql/io/HiveContextAwareRecordReader.java | 10 ++++++++++
.../org/apache/hadoop/hive/shims/HadoopShimsSecure.java | 6 ++++--
3 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
index aa607cc..ba25573 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
/**
@@ -43,7 +44,7 @@ public class CombineHiveRecordReader<K extends WritableComparable, V extends Wri
extends HiveContextAwareRecordReader<K, V> {
public CombineHiveRecordReader(InputSplit split, Configuration conf,
- Reporter reporter, Integer partition) throws IOException {
+ Reporter reporter, Integer partition, RecordReader preReader) throws IOException {
super((JobConf)conf);
CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ?
(CombineHiveInputSplit) split :
@@ -67,6 +68,15 @@ public class CombineHiveRecordReader<K extends WritableComparable, V extends Wri
this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter));
this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader);
+
+ //If current split is from the same file as preceding split and the preceding split has footerbuffer,
+ //the current split should use the preceding split's footerbuffer in order to skip footer correctly.
+ if (preReader != null && preReader instanceof CombineHiveRecordReader
+ && ((CombineHiveRecordReader)preReader).getFooterBuffer() != null) {
+ if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition]))
+ this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());
+ }
+
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
index d602c76..46f9970 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
@@ -244,6 +244,15 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader
private int headerCount = 0;
private int footerCount = 0;
+ protected FooterBuffer getFooterBuffer() {
+ return footerBuffer;
+ }
+
+ protected void setFooterBuffer( FooterBuffer buf) {
+ footerBuffer = buf;
+ }
+
+
public boolean doNext(K key, V value) throws IOException {
if (this.isSorted) {
if (this.getIOContext().shouldEndBinarySearch() ||
@@ -308,6 +317,7 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader
if (this.ioCxtRef.getCurrentBlockStart() == 0) {
// Check if the table file has header to skip.
+ footerBuffer = null;
Path filePath = this.ioCxtRef.getInputPath();
PartitionDesc part = null;
try {
http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
index 224ce3b..018cb9f 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
@@ -118,7 +118,8 @@ public abstract class HadoopShimsSecure implements HadoopShims {
InputSplit.class,
Configuration.class,
Reporter.class,
- Integer.class
+ Integer.class,
+ RecordReader.class
};
protected CombineFileSplit split;
@@ -237,6 +238,7 @@ public abstract class HadoopShimsSecure implements HadoopShims {
*/
protected boolean initNextRecordReader(K key) throws IOException {
+ RecordReader preReader = curReader; //it is OK, curReader is closed, for we only need footer buffer info from preReader.
if (curReader != null) {
curReader.close();
curReader = null;
@@ -253,7 +255,7 @@ public abstract class HadoopShimsSecure implements HadoopShims {
// get a record reader for the idx-th chunk
try {
curReader = rrConstructor.newInstance(new Object[]
- {split, jc, reporter, Integer.valueOf(idx)});
+ {split, jc, reporter, Integer.valueOf(idx), preReader});
// change the key if need be
if (key != null) {