You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/05/09 17:18:22 UTC

[hive] branch master updated: HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 8c88676  HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan)
8c88676 is described below

commit 8c88676a91115be56eee1a2fca5c9c2c3ee0402d
Author: Panagiotis Garefalakis <pa...@gmail.com>
AuthorDate: Sat May 9 10:17:42 2020 -0700

    HIVE-23393 : LLapInputFormat reader policy for Random IO formats (Panos G via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java  | 11 +++++++++++
 ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index ac1aca8..e184655 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -19,6 +19,9 @@
 
 package org.apache.hadoop.hive.llap.io.api.impl;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3AInputPolicy;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport;
 import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -62,6 +65,9 @@ import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hive.common.util.HiveStringUtils;
 
+import static org.apache.hadoop.hive.common.FileUtils.isS3a;
+import static org.apache.hadoop.hive.ql.io.HiveInputFormat.isRandomAccessInputFormat;
+
 public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowBatch>,
     VectorizedInputFormatInterface, SelfDescribingInputFormatInterface,
     AvoidSplitCombination {
@@ -100,6 +106,11 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
 
     FileSplit fileSplit = (FileSplit) split;
     reporter.setStatus(fileSplit.toString());
+    FileSystem splitFileSystem = fileSplit.getPath().getFileSystem(job);
+    if (isS3a(splitFileSystem) && isRandomAccessInputFormat(sourceInputFormat)) {
+      LlapIoImpl.LOG.debug("Changing S3A input policy to RANDOM");
+      ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random);
+    }
     try {
       // At this entry point, we are going to assume that these are logical table columns.
       // Perhaps we should go thru the code and clean this up to be more explicit; for now, we
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 218d665..62ef0c6 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -386,7 +386,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
    * @param inputFormat
    * @return
    */
-  private static boolean isRandomAccessInputFormat(InputFormat inputFormat) {
+  public static boolean isRandomAccessInputFormat(InputFormat inputFormat) {
     if (inputFormat instanceof OrcInputFormat ||
         inputFormat instanceof VectorizedParquetInputFormat) {
       return true;
@@ -449,7 +449,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable>
 
     FileSystem splitFileSystem = splitPath.getFileSystem(job);
     if (isS3a(splitFileSystem) && isRandomAccessInputFormat(inputFormat)) {
-      LOG.debug("Changing S3A input policy to RANDOM for split {}", splitPath);
+      LOG.debug("Changing S3A input policy to RANDOM");
       ((S3AFileSystem) splitFileSystem).setInputPolicy(S3AInputPolicy.Random);
     }