You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2022/08/08 06:30:55 UTC
[hive] branch master updated: HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new cf347f173a5 HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)
cf347f173a5 is described below
commit cf347f173a5c660d109d59fc37eafb64e04a54a3
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Mon Aug 8 08:30:47 2022 +0200
HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)
---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 53 ++++++++++++++--------
.../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 10 ++--
2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 36a87783903..4dbdc4c72d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9442,25 +9442,29 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// backtrack can be null when input is script operator
ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, parent);
- int kindex;
- if (exprBack == null) {
- kindex = -1;
- } else if (ExprNodeDescUtils.isConstant(exprBack)) {
- kindex = reduceKeysBack.indexOf(exprBack);
- } else {
- kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
- }
- if (kindex >= 0) {
- ColumnInfo newColInfo = new ColumnInfo(colInfo);
- String internalColName = Utilities.ReduceField.KEY + ".reducesinkkey" + kindex;
- newColInfo.setInternalName(internalColName);
- newColInfo.setTabAlias(nm[0]);
- outputRR.put(nm[0], nm[1], newColInfo);
- if (nm2 != null) {
- outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
+ if (exprBack != null) {
+ if (ExprNodeDescUtils.isConstant(exprBack)) {
+ int kindex = reduceKeysBack.indexOf(exprBack);
+ if (kindex >= 0) {
+ addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex);
+ continue;
+ }
+ } else {
+ int startIdx = 0;
+ int kindex;
+ // joinKey may present multiple times, add the duplicates to the schema with different internal name.
+ // example: KEY.reducesinkkey0, KEY.reducesinkkey1
+ // join LU_CUSTOMER a16
+ // on (a15.CUSTOMER_ID = a16.CUSTOMER_ID and pa11.CUSTOMER_ID = a16.CUSTOMER_ID)
+ while ((kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack, startIdx)) >= 0) {
+ addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex);
+ startIdx = kindex + 1;
+ }
+ if (startIdx > 0) {
+ // at least one instance found
+ continue;
+ }
}
- index[i] = kindex;
- continue;
}
index[i] = -reduceValues.size() - 1;
String outputColName = getColumnInternalName(reduceValues.size());
@@ -9534,6 +9538,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return rsOp;
}
+ private void addJoinKeyToRowSchema(
+ RowResolver outputRR, int[] index, int i, ColumnInfo colInfo, String[] nm, String[] nm2, int kindex) {
+ ColumnInfo newColInfo = new ColumnInfo(colInfo);
+ String internalColName = ReduceField.KEY + ".reducesinkkey" + kindex;
+ newColInfo.setInternalName(internalColName);
+ newColInfo.setTabAlias(nm[0]);
+ outputRR.put(nm[0], nm[1], newColInfo);
+ if (nm2 != null) {
+ outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
+ }
+ index[i] = kindex;
+ }
+
private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
Map<String, Operator> map,
Operator joiningOp) throws SemanticException {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 9ecccdaad8b..541ce20f518 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -71,7 +71,11 @@ public class ExprNodeDescUtils {
protected static final Logger LOG = LoggerFactory.getLogger(ExprNodeDescUtils.class);
public static int indexOf(ExprNodeDesc origin, List<ExprNodeDesc> sources) {
- for (int i = 0; i < sources.size(); i++) {
+ return indexOf(origin, sources, 0);
+ }
+
+ public static int indexOf(ExprNodeDesc origin, List<ExprNodeDesc> sources, int startIndex) {
+ for (int i = startIndex; i < sources.size(); i++) {
if (origin.isSame(sources.get(i))) {
return i;
}
@@ -533,9 +537,9 @@ public class ExprNodeDescUtils {
if(columnInternalName.startsWith(Utilities.ReduceField.VALUE.toString())) {
continue;
}
- if (source instanceof ExprNodeColumnDesc) {
+ ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName);
+ if (source instanceof ExprNodeColumnDesc && columnInfo != null) {
// The join key is a table column. Create the ExprNodeDesc based on this column.
- ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName);
return new ExprNodeColumnDesc(columnInfo);
} else {
// Join key expression is likely some expression involving functions/operators, so there