You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2013/03/04 07:01:23 UTC

svn commit: r1452189 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clientpositive/

Author: namit
Date: Mon Mar  4 06:01:23 2013
New Revision: 1452189

URL: http://svn.apache.org/r1452189
Log:
HIVE-3490 Implement * or a.* for arguments to UDFs
(Navis via namit)


Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java
    hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q
    hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Mon Mar  4 06:01:23 2013
@@ -160,11 +160,11 @@ function
     LPAREN
       (
         (star=STAR)
-        | (dist=KW_DISTINCT)? (expression (COMMA expression)*)?
+        | (dist=KW_DISTINCT)? (selectExpression (COMMA selectExpression)*)?
       )
     RPAREN -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName)
-           -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?)
-                            -> ^(TOK_FUNCTIONDI functionName (expression+)?)
+           -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)?)
+                            -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?)
     ;
 
 functionName

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Mar  4 06:01:23 2013
@@ -125,6 +125,7 @@ import org.apache.hadoop.hive.ql.plan.Cr
 import org.apache.hadoop.hive.ql.plan.DDLWork;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
@@ -8727,6 +8728,9 @@ public class SemanticAnalyzer extends Ba
       }
       throw new SemanticException(errMsg);
     }
+    if (desc instanceof ExprNodeColumnListDesc) {
+      throw new SemanticException("TOK_ALLCOLREF is not supported in current context");
+    }
 
     if (!unparseTranslator.isEnabled()) {
       // Not creating a view, so no need to track view expansions.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Mon Mar  4 06:01:23 2013
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
@@ -518,14 +519,11 @@ public final class TypeCheckProcFactory 
           serdeConstants.DECIMAL_TYPE_NAME);
     }
 
-    public static boolean isRedundantConversionFunction(ASTNode expr,
+    private static boolean isRedundantConversionFunction(ASTNode expr,
         boolean isFunction, ArrayList<ExprNodeDesc> children) {
       if (!isFunction) {
         return false;
       }
-      // children is always one less than the expr.getChildCount(), since the
-      // latter contains function name.
-      assert (children.size() == expr.getChildCount() - 1);
       // conversion functions take a single parameter
       if (children.size() != 1) {
         return false;
@@ -862,6 +860,43 @@ public final class TypeCheckProcFactory 
 
       ASTNode expr = (ASTNode) nd;
 
+      if (expr.getType() == HiveParser.TOK_TABNAME) {
+        return null;
+      }
+
+      if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+        RowResolver input = ctx.getInputRR();
+        ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
+        assert expr.getChildCount() <= 1;
+        if (expr.getChildCount() == 1) {
+          // table aliased (select a.*, for example)
+          ASTNode child = (ASTNode) expr.getChild(0);
+          assert child.getType() == HiveParser.TOK_TABNAME;
+          assert child.getChildCount() == 1;
+          String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
+          HashMap<String, ColumnInfo> columns = input.getFieldMap(tableAlias);
+          if (columns == null) {
+            throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child));
+          }
+          for (Map.Entry<String, ColumnInfo> colMap : columns.entrySet()) {
+            ColumnInfo colInfo = colMap.getValue();
+            if (!colInfo.getIsVirtualCol()) {
+              columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
+                  colInfo.getInternalName(), colInfo.getTabAlias(), false));
+            }
+          }
+        } else {
+          // all columns (select *, for example)
+          for (ColumnInfo colInfo : input.getColumnInfos()) {
+            if (!colInfo.getIsVirtualCol()) {
+              columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
+                  colInfo.getInternalName(), colInfo.getTabAlias(), false));
+            }
+          }
+        }
+        return columnList;
+      }
+
       // If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL,
       // and the operator is a DOT, then it's a table column reference.
       if (expr.getType() == HiveParser.DOT
@@ -893,7 +928,9 @@ public final class TypeCheckProcFactory 
         return null;
       }
 
-      boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION);
+      boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION ||
+          expr.getType() == HiveParser.TOK_FUNCTIONSTAR ||
+          expr.getType() == HiveParser.TOK_FUNCTIONDI);
 
       // Create all children
       int childrenBegin = (isFunction ? 1 : 0);
@@ -901,7 +938,21 @@ public final class TypeCheckProcFactory 
           .getChildCount()
           - childrenBegin);
       for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) {
-        children.add((ExprNodeDesc) nodeOutputs[ci]);
+        if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) {
+          children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren());
+        } else {
+          children.add((ExprNodeDesc) nodeOutputs[ci]);
+        }
+      }
+
+      if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
+        RowResolver input = ctx.getInputRR();
+        for (ColumnInfo colInfo : input.getColumnInfos()) {
+          if (!colInfo.getIsVirtualCol()) {
+            children.add(new ExprNodeColumnDesc(colInfo.getType(),
+                colInfo.getInternalName(), colInfo.getTabAlias(), false));
+          }
+        }
       }
 
       // If any of the children contains null, then return a null

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java Mon Mar  4 06:01:23 2013
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Dummy desc only for populating TOK_ALLCOLREF and should not be used
+ * outside of TypeCheckProcFactory
+ */
+public class ExprNodeColumnListDesc extends ExprNodeDesc {
+
+  List<ExprNodeColumnDesc> columns = new ArrayList<ExprNodeColumnDesc>();
+
+  public void addColumn(ExprNodeColumnDesc column) {
+    columns.add(column);
+  }
+
+  @Override
+  public ExprNodeDesc clone() {
+    ExprNodeColumnListDesc clone = new ExprNodeColumnListDesc();
+    clone.columns = new ArrayList<ExprNodeColumnDesc>(columns);
+    return clone;
+  }
+
+  @Override
+  public boolean isSame(Object o) {
+    if (o instanceof ExprNodeColumnListDesc) {
+      return columns.equals(((ExprNodeColumnListDesc)o).columns);
+    }
+    return false;
+  }
+
+  @Override
+  public TypeInfo getTypeInfo() {
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public void setTypeInfo(TypeInfo typeInfo) {
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public ObjectInspector getWritableObjectInspector() {
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public String getTypeString() {
+    throw new IllegalStateException();
+  }
+
+  @Override
+  public List<String> getCols() {
+    List<String> cols = new ArrayList<String>();
+    for (ExprNodeColumnDesc column : columns) {
+      cols.add(column.getColumn());
+    }
+    return cols;
+  }
+
+  @Override
+  public List<ExprNodeDesc> getChildren() {
+    return new ArrayList<ExprNodeDesc>(columns);
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q Mon Mar  4 06:01:23 2013
@@ -0,0 +1,16 @@
+explain
+select concat(*),array(*) from src where key < 100 limit 10;
+
+select concat(*),array(*) from src where key < 100 limit 10;
+
+-- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10;
+
+select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10;

Added: hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out Mon Mar  4 06:01:23 2013
@@ -0,0 +1,188 @@
+PREHOOK: query: explain
+select concat(*),array(*) from src where key < 100 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select concat(*),array(*) from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTIONSTAR array))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 100.0)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: concat(key, value)
+                      type: string
+                      expr: array(key,value)
+                      type: array<string>
+                outputColumnNames: _col0, _col1
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+
+PREHOOK: query: select concat(*),array(*) from src where key < 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select concat(*),array(*) from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86val_86	["86","val_86"]
+27val_27	["27","val_27"]
+98val_98	["98","val_98"]
+66val_66	["66","val_66"]
+37val_37	["37","val_37"]
+15val_15	["15","val_15"]
+82val_82	["82","val_82"]
+17val_17	["17","val_17"]
+0val_0	["0","val_0"]
+57val_57	["57","val_57"]
+PREHOOK: query: -- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (+ (. (TOK_TABLE_OR_COL a) key) 1) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)) (. (TOK_TABLE_OR_COL b) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) key) (TOK_ALLCOLREF (TOK_TABNAME b))))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION stack 2 TOK_ALLCOLREF) e1 e2 e3)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        x:a 
+          TableScan
+            alias: a
+            Filter Operator
+              predicate:
+                  expr: (key < 100.0)
+                  type: boolean
+              Reduce Output Operator
+                key expressions:
+                      expr: (key + 1)
+                      type: double
+                sort order: +
+                Map-reduce partition columns:
+                      expr: (key + 1)
+                      type: double
+                tag: 0
+                value expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+        x:b 
+          TableScan
+            alias: b
+            Reduce Output Operator
+              key expressions:
+                    expr: UDFToDouble(key)
+                    type: double
+              sort order: +
+              Map-reduce partition columns:
+                    expr: UDFToDouble(key)
+                    type: double
+              tag: 1
+              value expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          Select Operator
+            expressions:
+                  expr: 2
+                  type: int
+                  expr: concat(_col0, _col1, _col4, _col5)
+                  type: string
+                  expr: concat(_col0, _col1)
+                  type: string
+                  expr: concat(_col4, _col5)
+                  type: string
+                  expr: concat(_col0, _col1, _col4)
+                  type: string
+                  expr: concat(_col0, _col4, _col5)
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            UDTF Operator
+              function name: stack
+              Limit
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+
+
+PREHOOK: query: select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select stack(2, *) as (e1,e2,e3) from (
+  select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+  from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4val_45val_5	4val_4	5val_5
+4val_45	NULL	5val_5
+4val_45val_5	4val_4	5val_5
+4val_45	NULL	5val_5
+4val_45val_5	4val_4	5val_5
+4val_45	NULL	5val_5
+8val_89val_9	8val_8	9val_9
+8val_89	NULL	9val_9
+9val_910val_10	9val_9	10val_10
+9val_910	NULL	10val_10