You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/29 23:35:20 UTC

svn commit: r1028912 - in /hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/results/clientpositive/

Author: namit
Date: Fri Oct 29 21:35:20 2010
New Revision: 1028912

URL: http://svn.apache.org/viewvc?rev=1028912&view=rev
Log:
HIVE-1657 Join results are displayed wrongly for some complex joins
using select * (Amareshwari Sriramadasu via namit)


Modified:
    hive/trunk/CHANGES.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_reorder2.q.out
    hive/trunk/ql/src/test/results/clientpositive/join_reorder3.q.out

Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Fri Oct 29 21:35:20 2010
@@ -428,6 +428,9 @@ Trunk -  Unreleased
     HIVE-1756 Negative Test cleanup for 1641
     (Liyin Tang via namit)
 
+    HIVE-1657 Join results are displayed wrongly for some complex joins
+    using select * (Amareshwari Sriramadasu via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java Fri Oct 29 21:35:20 2010
@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Set;
 
 import org.apache.commons.logging.Log;
@@ -40,6 +42,7 @@ public class QB {
   private int numSelDi = 0;
   private HashMap<String, String> aliasToTabs;
   private HashMap<String, QBExpr> aliasToSubq;
+  private List<String> aliases;
   private QBParseInfo qbp;
   private QBMetaData qbm;
   private QBJoinTree qbjoin;
@@ -65,6 +68,7 @@ public class QB {
   public QB(String outer_id, String alias, boolean isSubQ) {
     aliasToTabs = new HashMap<String, String>();
     aliasToSubq = new HashMap<String, QBExpr>();
+    aliases = new ArrayList<String>();
     if (alias != null) {
       alias = alias.toLowerCase();
     }
@@ -110,6 +114,12 @@ public class QB {
     aliasToSubq.put(alias.toLowerCase(), qbexpr);
   }
 
+  public void addAlias(String alias) {
+    if (!aliases.contains(alias.toLowerCase())) {
+      aliases.add(alias.toLowerCase());
+    }
+  }
+
   public String getId() {
     return id;
   }
@@ -138,6 +148,10 @@ public class QB {
     return aliasToTabs.keySet();
   }
 
+  public List<String> getAliases() {
+    return aliases;
+  }
+
   public QBExpr getSubqForAlias(String alias) {
     return aliasToSubq.get(alias.toLowerCase());
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Oct 29 21:35:20 2010
@@ -422,6 +422,7 @@ public class SemanticAnalyzer extends Ba
     // Insert this map into the stats
     String table_name = unescapeIdentifier(tabref.getChild(0).getText());
     qb.setTabAlias(alias, table_name);
+    qb.addAlias(alias);
 
     qb.getParseInfo().setSrcForAlias(alias, tableTree);
 
@@ -455,6 +456,7 @@ public class SemanticAnalyzer extends Ba
     }
     // Insert this map into the stats
     qb.setSubqAlias(alias, qbexpr);
+    qb.addAlias(alias);
 
     unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
 
@@ -544,6 +546,7 @@ public class SemanticAnalyzer extends Ba
           .getMsg(lateralView));
     }
     qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
+    qb.addAlias(alias);
     return alias;
   }
 
@@ -694,6 +697,7 @@ public class SemanticAnalyzer extends Ba
         // Case of analyze command
         String table_name = unescapeIdentifier(ast.getChild(0).getChild(0).getText());
         qb.setTabAlias(table_name, table_name);
+        qb.addAlias(table_name);
         qb.getParseInfo().setIsAnalyzeCommand(true);
         // Allow analyze the whole table and dynamic partitions
         HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
@@ -1304,8 +1308,8 @@ public class SemanticAnalyzer extends Ba
 
   @SuppressWarnings("nls")
   private Integer genColListRegex(String colRegex, String tabAlias,
-      String alias, ASTNode sel, ArrayList<ExprNodeDesc> col_list,
-      RowResolver input, Integer pos, RowResolver output)
+      ASTNode sel, ArrayList<ExprNodeDesc> col_list,
+      RowResolver input, Integer pos, RowResolver output, List<String> aliases)
       throws SemanticException {
 
     // The table alias should exist
@@ -1324,43 +1328,57 @@ public class SemanticAnalyzer extends Ba
 
     StringBuilder replacementText = new StringBuilder();
     int matched = 0;
-    // This is the tab.* case
-    // In this case add all the columns to the fieldList
-    // from the input schema
-    for (ColumnInfo colInfo : input.getColumnInfos()) {
-      String name = colInfo.getInternalName();
-      String[] tmp = input.reverseLookup(name);
-
-      // Skip the colinfos which are not for this particular alias
-      if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
+    // add empty string to the list of aliases. Some operators (ex. GroupBy) add
+    // ColumnInfos for table alias "".
+    if (!aliases.contains("")) {
+      aliases.add("");
+    }
+    // For expr "*", aliases should be iterated in the order they are specified
+    // in the query.
+    for (String alias : aliases) {
+      HashMap<String, ColumnInfo> fMap = input.getFieldMap(alias);
+      if (fMap == null) {
         continue;
       }
+      // For the tab.* case, add all the columns to the fieldList
+      // from the input schema
+      for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
+        ColumnInfo colInfo = entry.getValue();
+        String name = colInfo.getInternalName();
+        String[] tmp = input.reverseLookup(name);
 
-      if(colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
-        continue;
-      }
+        // Skip the colinfos which are not for this particular alias
+        if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
+          continue;
+        }
 
-      // Not matching the regex?
-      if (!regex.matcher(tmp[1]).matches()) {
-        continue;
-      }
+        if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
+          continue;
+        }
 
-      ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name,
-          colInfo.getTabAlias(), colInfo.getIsVirtualCol());
-      col_list.add(expr);
-      output.put(tmp[0], tmp[1],
-          new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo
-          .getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
-      pos = Integer.valueOf(pos.intValue() + 1);
-      matched++;
+        // Not matching the regex?
+        if (!regex.matcher(tmp[1]).matches()) {
+          continue;
+        }
 
-      if (unparseTranslator.isEnabled()) {
-        if (replacementText.length() > 0) {
-          replacementText.append(", ");
-        }
-        replacementText.append(HiveUtils.unparseIdentifier(tmp[0]));
-        replacementText.append(".");
-        replacementText.append(HiveUtils.unparseIdentifier(tmp[1]));
+        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
+            name, colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+        col_list.add(expr);
+        output.put(tmp[0], tmp[1],
+            new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
+            colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
+            colInfo.isHiddenVirtualCol()));
+        pos = Integer.valueOf(pos.intValue() + 1);
+        matched++;
+
+        if (unparseTranslator.isEnabled()) {
+          if (replacementText.length() > 0) {
+            replacementText.append(", ");
+          }
+          replacementText.append(HiveUtils.unparseIdentifier(tmp[0]));
+          replacementText.append(".");
+          replacementText.append(HiveUtils.unparseIdentifier(tmp[1]));
+        }
       }
     }
     if (matched == 0) {
@@ -1881,6 +1899,7 @@ public class SemanticAnalyzer extends Ba
           assert (selExprChild.getChildCount() == 1);
           udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
               .getText());
+          qb.addAlias(udtfTableAlias);
           unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
               .getChild(0));
           break;
@@ -1952,7 +1971,7 @@ public class SemanticAnalyzer extends Ba
       if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
         pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
             : unescapeIdentifier(expr.getChild(0).getText().toLowerCase()),
-            alias, expr, col_list, inputRR, pos, out_rwsch);
+            expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
         selectStar = true;
       } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
           && !inputRR.getIsExprResolver()
@@ -1961,7 +1980,7 @@ public class SemanticAnalyzer extends Ba
         // This can only happen without AS clause
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
-            null, alias, expr, col_list, inputRR, pos, out_rwsch);
+            null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases());
       } else if (expr.getType() == HiveParser.DOT
           && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
           && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
@@ -1973,7 +1992,8 @@ public class SemanticAnalyzer extends Ba
         // We don't allow this for ExprResolver - the Group By case
         pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
             unescapeIdentifier(expr.getChild(0).getChild(0).getText()
-            .toLowerCase()), alias, expr, col_list, inputRR, pos, out_rwsch);
+            .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
+            qb.getAliases());
       } else {
         // Case when this is an expression
         ExprNodeDesc exp = genExprNodeDesc(expr, inputRR);
@@ -5971,6 +5991,10 @@ public class SemanticAnalyzer extends Ba
           QB blankQb = new QB(null, null, false);
           Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree
               .getChild(0), blankQb, lvForward);
+          // add udtf aliases to QB
+          for (String udtfAlias : blankQb.getAliases()) {
+            qb.addAlias(udtfAlias);
+          }
           RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRR();
 
           // Merge the two into the lateral view join

Modified: hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/join_filters.q.out Fri Oct 29 21:35:20 2010
@@ -324,10 +324,10 @@ PREHOOK: Output: file:/tmp/amarsri/hive_
 POSTHOOK: query: SELECT * from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.key=c.key AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1
-POSTHOOK: Output: file:/tmp/amarsri/hive_2010-09-21_00-47-42_197_5979333762407595753/-mr-10000
-NULL	40	NULL	NULL	NULL	NULL
-12	35	NULL	NULL	NULL	NULL
-48	NULL	NULL	NULL	NULL	NULL
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-27_03-21-12_815_3004255014715798791/-mr-10000
+NULL	NULL	NULL	40	NULL	NULL
+NULL	NULL	12	35	NULL	NULL
+NULL	NULL	48	NULL	NULL	NULL
 100	100	100	100	100	100
 PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b RIGHT OUTER JOIN myinput1 c ON a.value = b.value and b.key = c.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value AND c.key > 40 AND c.value > 50 AND c.key = c.value
 PREHOOK: type: QUERY

Modified: hive/trunk/ql/src/test/results/clientpositive/join_reorder2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/join_reorder2.q.out?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/join_reorder2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/join_reorder2.q.out Fri Oct 29 21:35:20 2010
@@ -378,6 +378,10 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13
           Select Operator
             expressions:
+                  expr: _col8
+                  type: string
+                  expr: _col9
+                  type: string
                   expr: _col0
                   type: string
                   expr: _col1
@@ -386,10 +390,6 @@ STAGE PLANS:
                   type: string
                   expr: _col5
                   type: string
-                  expr: _col8
-                  type: string
-                  expr: _col9
-                  type: string
                   expr: _col12
                   type: string
                   expr: _col13
@@ -426,4 +426,4 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t3
 POSTHOOK: Input: default@t4
 POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-06-33_450_777508846599090366/-mr-10000
-2	22	2	12	2	12	2	12
+2	12	2	22	2	12	2	12

Modified: hive/trunk/ql/src/test/results/clientpositive/join_reorder3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/join_reorder3.q.out?rev=1028912&r1=1028911&r2=1028912&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/join_reorder3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/join_reorder3.q.out Fri Oct 29 21:35:20 2010
@@ -378,6 +378,10 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col12, _col13
           Select Operator
             expressions:
+                  expr: _col8
+                  type: string
+                  expr: _col9
+                  type: string
                   expr: _col0
                   type: string
                   expr: _col1
@@ -386,10 +390,6 @@ STAGE PLANS:
                   type: string
                   expr: _col5
                   type: string
-                  expr: _col8
-                  type: string
-                  expr: _col9
-                  type: string
                   expr: _col12
                   type: string
                   expr: _col13
@@ -426,4 +426,4 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t3
 POSTHOOK: Input: default@t4
 POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-06-51_531_8070318925118385343/-mr-10000
-2	22	2	12	2	12	2	12
+2	12	2	22	2	12	2	12