You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/27 00:30:19 UTC
[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as
part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f712def HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
f712def is described below
commit f712def65b716ba6646828ed8f8be4464abbedc8
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Mon May 18 16:51:15 2020 +0100
HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
Change-Id: Ic1efd6dcffc71adfa1ac3059ceacbd3f30e6ef7e
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../hive/llap/io/decode/GenericColumnVectorProducer.java | 3 +--
.../hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java | 5 +----
.../hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java | 10 +++-------
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
index 1617692..1c7e537 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
@@ -85,8 +85,7 @@ public class GenericColumnVectorProducer implements ColumnVectorProducer {
SchemaEvolutionFactory sef, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe,
Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException {
cacheMetrics.incrCacheReadRequests();
- OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
- consumer, includes, false, counters, ioMetrics);
+ OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics);
SerDeFileMetadata fm;
try {
fm = new SerDeFileMetadata(sourceSerDe);
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
index 17c4821..50abdfd 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
@@ -56,7 +56,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
private final LowLevelCache lowLevelCache;
private final BufferUsageManager bufferManager;
private final Configuration conf;
- private boolean _skipCorrupt; // TODO: get rid of this
private LlapDaemonCacheMetrics cacheMetrics;
private LlapDaemonIOMetrics ioMetrics;
// TODO: if using in multiple places, e.g. SerDe cache, pass this in.
@@ -73,7 +72,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
this.lowLevelCache = lowLevelCache;
this.bufferManager = bufferManager;
this.conf = conf;
- this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
this.cacheMetrics = cacheMetrics;
this.ioMetrics = ioMetrics;
this.tracePool = tracePool;
@@ -90,8 +88,7 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
InputFormat<?, ?> unused0, Deserializer unused1, Reporter reporter, JobConf job,
Map<Path, PartitionDesc> parts) throws IOException {
cacheMetrics.incrCacheReadRequests();
- OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
- consumer, includes, _skipCorrupt, counters, ioMetrics);
+ OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics);
OrcEncodedDataReader reader = new OrcEncodedDataReader(lowLevelCache, bufferManager,
metadataCache, conf, job, split, includes, sarg, edc, counters, sef, tracePool, parts);
edc.init(reader, reader, reader.getTrace());
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index b697a0d..79dba42 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -70,20 +70,16 @@ public class OrcEncodedDataConsumer
private ConsumerFileMetadata fileMetadata; // We assume one request is only for one file.
private CompressionCodec codec;
private List<ConsumerStripeMetadata> stripes;
- private final boolean skipCorrupt; // TODO: get rid of this
private SchemaEvolution evolution;
private IoTrace trace;
private final Includes includes;
private TypeDescription[] batchSchemas;
private boolean useDecimal64ColumnVectors;
- public OrcEncodedDataConsumer(
- Consumer<ColumnVectorBatch> consumer, Includes includes, boolean skipCorrupt,
- QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) {
+ public OrcEncodedDataConsumer(Consumer<ColumnVectorBatch> consumer, Includes includes,
+ QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) {
super(consumer, includes.getPhysicalColumnIds().size(), ioMetrics, counters);
this.includes = includes;
- // TODO: get rid of this
- this.skipCorrupt = skipCorrupt;
if (includes.isProbeDecodeEnabled()) {
LlapIoImpl.LOG.info("OrcEncodedDataConsumer probeDecode is enabled with cacheKey {} colIndex {} and colName {}",
this.includes.getProbeCacheKey(), this.includes.getProbeColIdx(), this.includes.getProbeColName());
@@ -225,7 +221,7 @@ public class OrcEncodedDataConsumer
private void createColumnReaders(OrcEncodedColumnBatch batch,
ConsumerStripeMetadata stripeMetadata, TypeDescription fileSchema) throws IOException {
TreeReaderFactory.Context context = new TreeReaderFactory.ReaderContext()
- .setSchemaEvolution(evolution).skipCorrupt(skipCorrupt)
+ .setSchemaEvolution(evolution)
.writerTimeZone(stripeMetadata.getWriterTimezone())
.fileFormat(fileMetadata == null ? null : fileMetadata.getFileVersion())
.useUTCTimestamp(true)