You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2013/03/04 07:01:23 UTC
svn commit: r1452189 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
test/queries/clientpositive/ test/results/clientpositive/
Author: namit
Date: Mon Mar 4 06:01:23 2013
New Revision: 1452189
URL: http://svn.apache.org/r1452189
Log:
HIVE-3490 Implement * or a.* for arguments to UDFs
(Navis via namit)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java
hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q
hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Mon Mar 4 06:01:23 2013
@@ -160,11 +160,11 @@ function
LPAREN
(
(star=STAR)
- | (dist=KW_DISTINCT)? (expression (COMMA expression)*)?
+ | (dist=KW_DISTINCT)? (selectExpression (COMMA selectExpression)*)?
)
RPAREN -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName)
- -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?)
- -> ^(TOK_FUNCTIONDI functionName (expression+)?)
+ -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)?)
+ -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?)
;
functionName
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Mar 4 06:01:23 2013
@@ -125,6 +125,7 @@ import org.apache.hadoop.hive.ql.plan.Cr
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
@@ -8727,6 +8728,9 @@ public class SemanticAnalyzer extends Ba
}
throw new SemanticException(errMsg);
}
+ if (desc instanceof ExprNodeColumnListDesc) {
+ throw new SemanticException("TOK_ALLCOLREF is not supported in current context");
+ }
if (!unparseTranslator.isEnabled()) {
// Not creating a view, so no need to track view expansions.
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1452189&r1=1452188&r2=1452189&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Mon Mar 4 06:01:23 2013
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
@@ -518,14 +519,11 @@ public final class TypeCheckProcFactory
serdeConstants.DECIMAL_TYPE_NAME);
}
- public static boolean isRedundantConversionFunction(ASTNode expr,
+ private static boolean isRedundantConversionFunction(ASTNode expr,
boolean isFunction, ArrayList<ExprNodeDesc> children) {
if (!isFunction) {
return false;
}
- // children is always one less than the expr.getChildCount(), since the
- // latter contains function name.
- assert (children.size() == expr.getChildCount() - 1);
// conversion functions take a single parameter
if (children.size() != 1) {
return false;
@@ -862,6 +860,43 @@ public final class TypeCheckProcFactory
ASTNode expr = (ASTNode) nd;
+ if (expr.getType() == HiveParser.TOK_TABNAME) {
+ return null;
+ }
+
+ if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+ RowResolver input = ctx.getInputRR();
+ ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc();
+ assert expr.getChildCount() <= 1;
+ if (expr.getChildCount() == 1) {
+ // table aliased (select a.*, for example)
+ ASTNode child = (ASTNode) expr.getChild(0);
+ assert child.getType() == HiveParser.TOK_TABNAME;
+ assert child.getChildCount() == 1;
+ String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
+ HashMap<String, ColumnInfo> columns = input.getFieldMap(tableAlias);
+ if (columns == null) {
+ throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(child));
+ }
+ for (Map.Entry<String, ColumnInfo> colMap : columns.entrySet()) {
+ ColumnInfo colInfo = colMap.getValue();
+ if (!colInfo.getIsVirtualCol()) {
+ columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
+ colInfo.getInternalName(), colInfo.getTabAlias(), false));
+ }
+ }
+ } else {
+ // all columns (select *, for example)
+ for (ColumnInfo colInfo : input.getColumnInfos()) {
+ if (!colInfo.getIsVirtualCol()) {
+ columnList.addColumn(new ExprNodeColumnDesc(colInfo.getType(),
+ colInfo.getInternalName(), colInfo.getTabAlias(), false));
+ }
+ }
+ }
+ return columnList;
+ }
+
// If the first child is a TOK_TABLE_OR_COL, and nodeOutput[0] is NULL,
// and the operator is a DOT, then it's a table column reference.
if (expr.getType() == HiveParser.DOT
@@ -893,7 +928,9 @@ public final class TypeCheckProcFactory
return null;
}
- boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION);
+ boolean isFunction = (expr.getType() == HiveParser.TOK_FUNCTION ||
+ expr.getType() == HiveParser.TOK_FUNCTIONSTAR ||
+ expr.getType() == HiveParser.TOK_FUNCTIONDI);
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
@@ -901,7 +938,21 @@ public final class TypeCheckProcFactory
.getChildCount()
- childrenBegin);
for (int ci = childrenBegin; ci < expr.getChildCount(); ci++) {
- children.add((ExprNodeDesc) nodeOutputs[ci]);
+ if (nodeOutputs[ci] instanceof ExprNodeColumnListDesc) {
+ children.addAll(((ExprNodeColumnListDesc)nodeOutputs[ci]).getChildren());
+ } else {
+ children.add((ExprNodeDesc) nodeOutputs[ci]);
+ }
+ }
+
+ if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) {
+ RowResolver input = ctx.getInputRR();
+ for (ColumnInfo colInfo : input.getColumnInfos()) {
+ if (!colInfo.getIsVirtualCol()) {
+ children.add(new ExprNodeColumnDesc(colInfo.getType(),
+ colInfo.getInternalName(), colInfo.getTabAlias(), false));
+ }
+ }
}
// If any of the children contains null, then return a null
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeColumnListDesc.java Mon Mar 4 06:01:23 2013
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Dummy desc only for populating TOK_ALLCOLREF and should not be used
+ * outside of TypeCheckProcFactory
+ */
+public class ExprNodeColumnListDesc extends ExprNodeDesc {
+
+ List<ExprNodeColumnDesc> columns = new ArrayList<ExprNodeColumnDesc>();
+
+ public void addColumn(ExprNodeColumnDesc column) {
+ columns.add(column);
+ }
+
+ @Override
+ public ExprNodeDesc clone() {
+ ExprNodeColumnListDesc clone = new ExprNodeColumnListDesc();
+ clone.columns = new ArrayList<ExprNodeColumnDesc>(columns);
+ return clone;
+ }
+
+ @Override
+ public boolean isSame(Object o) {
+ if (o instanceof ExprNodeColumnListDesc) {
+ return columns.equals(((ExprNodeColumnListDesc)o).columns);
+ }
+ return false;
+ }
+
+ @Override
+ public TypeInfo getTypeInfo() {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void setTypeInfo(TypeInfo typeInfo) {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public ObjectInspector getWritableObjectInspector() {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public String getTypeString() {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public List<String> getCols() {
+ List<String> cols = new ArrayList<String>();
+ for (ExprNodeColumnDesc column : columns) {
+ cols.add(column.getColumn());
+ }
+ return cols;
+ }
+
+ @Override
+ public List<ExprNodeDesc> getChildren() {
+ return new ArrayList<ExprNodeDesc>(columns);
+ }
+}
Added: hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/allcolref_in_udf.q Mon Mar 4 06:01:23 2013
@@ -0,0 +1,16 @@
+explain
+select concat(*),array(*) from src where key < 100 limit 10;
+
+select concat(*),array(*) from src where key < 100 limit 10;
+
+-- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10;
+
+select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10;
Added: hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out?rev=1452189&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/allcolref_in_udf.q.out Mon Mar 4 06:01:23 2013
@@ -0,0 +1,188 @@
+PREHOOK: query: explain
+select concat(*),array(*) from src where key < 100 limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select concat(*),array(*) from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTIONSTAR array))) (TOK_WHERE (< (TOK_TABLE_OR_COL key) 100)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key < 100.0)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: concat(key, value)
+ type: string
+ expr: array(key,value)
+ type: array<string>
+ outputColumnNames: _col0, _col1
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+
+PREHOOK: query: select concat(*),array(*) from src where key < 100 limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select concat(*),array(*) from src where key < 100 limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86val_86 ["86","val_86"]
+27val_27 ["27","val_27"]
+98val_98 ["98","val_98"]
+66val_66 ["66","val_66"]
+37val_37 ["37","val_37"]
+15val_15 ["15","val_15"]
+82val_82 ["82","val_82"]
+17val_17 ["17","val_17"]
+0val_0 ["0","val_0"]
+57val_57 ["57","val_57"]
+PREHOOK: query: -- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The order of columns is decided by row schema of prev operator
+-- Like join which has two or more aliases, it's from left most aias to right aliases.
+
+explain
+select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (+ (. (TOK_TABLE_OR_COL a) key) 1) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR concat)) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_ALLCOLREF (TOK_TABNAME a)) (. (TOK_TABLE_OR_COL b) key))) (TOK_SELEXPR (TOK_FUNCTION concat (. (TOK_TABLE_OR_COL a) key) (TOK_ALLCOLREF (TOK_TABNAME b))))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 100)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION stack 2 TOK_ALLCOLREF) e1 e2 e3)) (TOK_LIMIT 10)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ x:a
+ TableScan
+ alias: a
+ Filter Operator
+ predicate:
+ expr: (key < 100.0)
+ type: boolean
+ Reduce Output Operator
+ key expressions:
+ expr: (key + 1)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: (key + 1)
+ type: double
+ tag: 0
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ x:b
+ TableScan
+ alias: b
+ Reduce Output Operator
+ key expressions:
+ expr: UDFToDouble(key)
+ type: double
+ sort order: +
+ Map-reduce partition columns:
+ expr: UDFToDouble(key)
+ type: double
+ tag: 1
+ value expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1}
+ 1 {VALUE._col0} {VALUE._col1}
+ handleSkewJoin: false
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: 2
+ type: int
+ expr: concat(_col0, _col1, _col4, _col5)
+ type: string
+ expr: concat(_col0, _col1)
+ type: string
+ expr: concat(_col4, _col5)
+ type: string
+ expr: concat(_col0, _col1, _col4)
+ type: string
+ expr: concat(_col0, _col4, _col5)
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ UDTF Operator
+ function name: stack
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+
+
+PREHOOK: query: select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select stack(2, *) as (e1,e2,e3) from (
+ select concat(*), concat(a.*), concat(b.*), concat(a.*, b.key), concat(a.key, b.*)
+ from src a join src b on a.key+1=b.key where a.key < 100) x limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4val_45val_5 4val_4 5val_5
+4val_45 NULL 5val_5
+4val_45val_5 4val_4 5val_5
+4val_45 NULL 5val_5
+4val_45val_5 4val_4 5val_5
+4val_45 NULL 5val_5
+8val_89val_9 8val_8 9val_9
+8val_89 NULL 9val_9
+9val_910val_10 9val_9 10val_10
+9val_910 NULL 10val_10