You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/27 00:30:19 UTC
[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new f712def  HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
f712def is described below

commit f712def65b716ba6646828ed8f8be4464abbedc8
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Mon May 18 16:51:15 2020 +0100

    HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)
    
    Change-Id: Ic1efd6dcffc71adfa1ac3059ceacbd3f30e6ef7e
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../hive/llap/io/decode/GenericColumnVectorProducer.java       |  3 +--
 .../hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java    |  5 +----
 .../hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java     | 10 +++-------
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
index 1617692..1c7e537 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
@@ -85,8 +85,7 @@ public class GenericColumnVectorProducer implements ColumnVectorProducer {
       SchemaEvolutionFactory sef, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe,
       Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException {
     cacheMetrics.incrCacheReadRequests();
-    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-        consumer, includes, false, counters, ioMetrics);
+    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics);
     SerDeFileMetadata fm;
     try {
       fm = new SerDeFileMetadata(sourceSerDe);
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
index 17c4821..50abdfd 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
@@ -56,7 +56,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
   private final LowLevelCache lowLevelCache;
   private final BufferUsageManager bufferManager;
   private final Configuration conf;
-  private boolean _skipCorrupt; // TODO: get rid of this
   private LlapDaemonCacheMetrics cacheMetrics;
   private LlapDaemonIOMetrics ioMetrics;
   // TODO: if using in multiple places, e.g. SerDe cache, pass this in.
@@ -73,7 +72,6 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
     this.lowLevelCache = lowLevelCache;
     this.bufferManager = bufferManager;
     this.conf = conf;
-    this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
     this.cacheMetrics = cacheMetrics;
     this.ioMetrics = ioMetrics;
     this.tracePool = tracePool;
@@ -90,8 +88,7 @@ public class OrcColumnVectorProducer implements ColumnVectorProducer {
       InputFormat<?, ?> unused0, Deserializer unused1, Reporter reporter, JobConf job,
       Map<Path, PartitionDesc> parts) throws IOException {
     cacheMetrics.incrCacheReadRequests();
-    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-        consumer, includes, _skipCorrupt, counters, ioMetrics);
+    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, includes, counters, ioMetrics);
     OrcEncodedDataReader reader = new OrcEncodedDataReader(lowLevelCache, bufferManager,
         metadataCache, conf, job, split, includes, sarg, edc, counters, sef, tracePool, parts);
     edc.init(reader, reader, reader.getTrace());
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index b697a0d..79dba42 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -70,20 +70,16 @@ public class OrcEncodedDataConsumer
   private ConsumerFileMetadata fileMetadata; // We assume one request is only for one file.
   private CompressionCodec codec;
   private List<ConsumerStripeMetadata> stripes;
-  private final boolean skipCorrupt; // TODO: get rid of this
   private SchemaEvolution evolution;
   private IoTrace trace;
   private final Includes includes;
   private TypeDescription[] batchSchemas;
   private boolean useDecimal64ColumnVectors;
 
-  public OrcEncodedDataConsumer(
-    Consumer<ColumnVectorBatch> consumer, Includes includes, boolean skipCorrupt,
-    QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) {
+  public OrcEncodedDataConsumer(Consumer<ColumnVectorBatch> consumer, Includes includes,
+                                QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) {
     super(consumer, includes.getPhysicalColumnIds().size(), ioMetrics, counters);
     this.includes = includes;
-    // TODO: get rid of this
-    this.skipCorrupt = skipCorrupt;
     if (includes.isProbeDecodeEnabled()) {
       LlapIoImpl.LOG.info("OrcEncodedDataConsumer probeDecode is enabled with cacheKey {} colIndex {} and colName {}",
               this.includes.getProbeCacheKey(), this.includes.getProbeColIdx(), this.includes.getProbeColName());
@@ -225,7 +221,7 @@ public class OrcEncodedDataConsumer
   private void createColumnReaders(OrcEncodedColumnBatch batch,
       ConsumerStripeMetadata stripeMetadata, TypeDescription fileSchema) throws IOException {
     TreeReaderFactory.Context context = new TreeReaderFactory.ReaderContext()
-            .setSchemaEvolution(evolution).skipCorrupt(skipCorrupt)
+            .setSchemaEvolution(evolution)
             .writerTimeZone(stripeMetadata.getWriterTimezone())
             .fileFormat(fileMetadata == null ? null : fileMetadata.getFileVersion())
             .useUTCTimestamp(true)