You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/07/22 04:56:48 UTC
[hive] branch master updated: HIVE-23733 : genIncludedColNames
functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding
getOriginalColumnNames as part of LLAP Includes interface
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d7ee83d HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan) Adding getOriginalColumnNames as part of LLAP Includes interface
d7ee83d is described below
commit d7ee83d0e23d74f0258ab3678bced016d4043db3
Author: Panos Garefalakis <pg...@cloudera.com>
AuthorDate: Sat Jun 20 21:59:16 2020 +0100
HIVE-23733 : genIncludedColNames functionality for ORCIputFormat ( Panos G via Ashutosh Chauhan)
Adding getOriginalColumnNames as part of LLAP Includes interface
Signed-off-by: Ashutosh Chauhan <as...@cloudera.com>
---
.../hive/llap/io/api/impl/LlapRecordReader.java | 6 ++++++
.../hive/llap/io/decode/ColumnVectorProducer.java | 1 +
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 25 ++++++++++++++++++++++
3 files changed, 32 insertions(+)
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index a257a06..55a142e 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -742,6 +742,12 @@ class LlapRecordReader implements RecordReader<NullWritable, VectorizedRowBatch>
}
@Override
+ public String[] getOriginalColumnNames(TypeDescription fileSchema) {
+ return OrcInputFormat.genIncludedColNames(
+ fileSchema, filePhysicalColumnIds, acidStructColumnId);
+ }
+
+ @Override
public String getQueryId() {
return HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVEQUERYID);
}
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
index e37379b..2a3d7fd 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/ColumnVectorProducer.java
@@ -49,6 +49,7 @@ public interface ColumnVectorProducer {
List<Integer> getPhysicalColumnIds();
List<Integer> getReaderLogicalColumnIds();
TypeDescription[] getBatchReaderTypes(TypeDescription fileSchema);
+ String[] getOriginalColumnNames(TypeDescription fileSchema);
String getQueryId();
boolean isProbeDecodeEnabled();
byte getProbeMjSmallTablePos();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index de962cd..1380185 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -421,6 +421,31 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
return result;
}
+ // Mostly dup of genIncludedColumns
+ public static String[] genIncludedColNames(TypeDescription fileSchema,
+ List<Integer> included, Integer recursiveStruct) {
+ String[] originalColNames = new String[included.size()];
+ List<TypeDescription> children = fileSchema.getChildren();
+ for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
+ int indexInBatchCols = included.indexOf(columnNumber);
+ if (indexInBatchCols >= 0) {
+ // child Index and FiledIdx should be the same
+ originalColNames[indexInBatchCols] = fileSchema.getFieldNames().get(columnNumber);
+ } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
+ // This assumes all struct cols immediately follow struct
+ List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren();
+ for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) {
+ int columnNumberNested = columnNumber + 1 + columnNumberDelta;
+ int nestedIxInBatchCols = included.indexOf(columnNumberNested);
+ if (nestedIxInBatchCols >= 0) {
+ originalColNames[nestedIxInBatchCols] = children.get(columnNumber).getFieldNames().get(columnNumberDelta);
+ }
+ }
+ }
+ }
+ return originalColNames;
+ }
+
private static void addColumnToIncludes(TypeDescription child, boolean[] result) {
for(int col = child.getId(); col <= child.getMaximumId(); ++col) {