You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2022/08/08 06:30:55 UTC

[hive] branch master updated: HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cf347f173a5 HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)
cf347f173a5 is described below

commit cf347f173a5c660d109d59fc37eafb64e04a54a3
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Mon Aug 8 08:30:47 2022 +0200

    HIVE-26452: NPE when converting join to mapjoin and join column referenced more than once (Krisztian Kasa, reviewed by Ayush Saxena, Aman Sinha)
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 53 ++++++++++++++--------
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java     | 10 ++--
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 36a87783903..4dbdc4c72d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9442,25 +9442,29 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
       // backtrack can be null when input is script operator
       ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, parent);
-      int kindex;
-      if (exprBack == null) {
-        kindex = -1;
-      } else if (ExprNodeDescUtils.isConstant(exprBack)) {
-        kindex = reduceKeysBack.indexOf(exprBack);
-      } else {
-        kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
-      }
-      if (kindex >= 0) {
-        ColumnInfo newColInfo = new ColumnInfo(colInfo);
-        String internalColName = Utilities.ReduceField.KEY + ".reducesinkkey" + kindex;
-        newColInfo.setInternalName(internalColName);
-        newColInfo.setTabAlias(nm[0]);
-        outputRR.put(nm[0], nm[1], newColInfo);
-        if (nm2 != null) {
-          outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
+      if (exprBack != null) {
+        if (ExprNodeDescUtils.isConstant(exprBack)) {
+          int kindex = reduceKeysBack.indexOf(exprBack);
+          if (kindex >= 0) {
+            addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex);
+            continue;
+          }
+        } else {
+          int startIdx = 0;
+          int kindex;
+          // joinKey may present multiple times, add the duplicates to the schema with different internal name.
+          // example: KEY.reducesinkkey0, KEY.reducesinkkey1
+          //      join        LU_CUSTOMER        a16
+          //      on         (a15.CUSTOMER_ID = a16.CUSTOMER_ID and pa11.CUSTOMER_ID = a16.CUSTOMER_ID)
+          while ((kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack, startIdx)) >= 0) {
+            addJoinKeyToRowSchema(outputRR, index, i, colInfo, nm, nm2, kindex);
+            startIdx = kindex + 1;
+          }
+          if (startIdx > 0) {
+            // at least one instance found
+            continue;
+          }
         }
-        index[i] = kindex;
-        continue;
       }
       index[i] = -reduceValues.size() - 1;
       String outputColName = getColumnInternalName(reduceValues.size());
@@ -9534,6 +9538,19 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return rsOp;
   }
 
+  private void addJoinKeyToRowSchema(
+      RowResolver outputRR, int[] index, int i, ColumnInfo colInfo, String[] nm, String[] nm2, int kindex) {
+    ColumnInfo newColInfo = new ColumnInfo(colInfo);
+    String internalColName = ReduceField.KEY + ".reducesinkkey" + kindex;
+    newColInfo.setInternalName(internalColName);
+    newColInfo.setTabAlias(nm[0]);
+    outputRR.put(nm[0], nm[1], newColInfo);
+    if (nm2 != null) {
+      outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
+    }
+    index[i] = kindex;
+  }
+
   private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
                                    Map<String, Operator> map,
                                    Operator joiningOp) throws SemanticException {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 9ecccdaad8b..541ce20f518 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -71,7 +71,11 @@ public class ExprNodeDescUtils {
   protected static final Logger LOG = LoggerFactory.getLogger(ExprNodeDescUtils.class);
 
   public static int indexOf(ExprNodeDesc origin, List<ExprNodeDesc> sources) {
-    for (int i = 0; i < sources.size(); i++) {
+    return indexOf(origin, sources, 0);
+  }
+
+  public static int indexOf(ExprNodeDesc origin, List<ExprNodeDesc> sources, int startIndex) {
+    for (int i = startIndex; i < sources.size(); i++) {
       if (origin.isSame(sources.get(i))) {
         return i;
       }
@@ -533,9 +537,9 @@ public class ExprNodeDescUtils {
         if(columnInternalName.startsWith(Utilities.ReduceField.VALUE.toString())) {
           continue;
         }
-        if (source instanceof ExprNodeColumnDesc) {
+        ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName);
+        if (source instanceof ExprNodeColumnDesc && columnInfo != null) {
           // The join key is a table column. Create the ExprNodeDesc based on this column.
-          ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName);
           return new ExprNodeColumnDesc(columnInfo);
         } else {
           // Join key expression is likely some expression involving functions/operators, so there