You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/12 02:13:33 UTC
svn commit: r783961 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/
ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/
ql/src/test/results/clientnegative/ q...
Author: namit
Date: Fri Jun 12 00:13:33 2009
New Revision: 783961
URL: http://svn.apache.org/viewvc?rev=783961&view=rev
Log:
HIVE-470. Add COALESCE. (Zheng Shao via namit)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q
hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out
hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=783961&r1=783960&r2=783961&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Jun 12 00:13:33 2009
@@ -41,6 +41,8 @@
HIVE-195. Map-side join. (Namit Jain via zshao)
+ HIVE-470. Add COALESCE. (Zheng Shao via namit)
+
IMPROVEMENTS
HIVE-389. Option to build without ivy (jssarma)
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=783961&r1=783960&r2=783961&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri Jun 12 00:13:33 2009
@@ -162,6 +162,7 @@
registerGenericUDF("case", GenericUDFCase.class);
registerGenericUDF("when", GenericUDFWhen.class);
registerGenericUDF("hash", GenericUDFHash.class);
+ registerGenericUDF("coalesce", GenericUDFCoalesce.class);
}
public static FunctionInfo getInfo(Class<?> fClass) {
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java Fri Jun 12 00:13:33 2009
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+/**
+ * GenericUDF Class for SQL construct "COALESCE(a, b, c)".
+ *
+ * NOTES:
+ * 1. a, b and c should have the same TypeInfo, or an exception will be thrown.
+ */
+public class GenericUDFCoalesce extends GenericUDF {
+
+ private static Log LOG = LogFactory.getLog(GenericUDFCoalesce.class.getName());
+
+ ObjectInspector[] argumentOIs;
+ GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentTypeException {
+
+ this.argumentOIs = arguments;
+
+ returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver();
+ for (int i=0; i<arguments.length; i++) {
+ if (!returnOIResolver.update(arguments[i])) {
+ throw new UDFArgumentTypeException(i,
+ "The expressions after COALESCE should all have the same type: \""
+ + returnOIResolver.get().getTypeName() + "\" is expected but \""
+ + arguments[i].getTypeName() + "\" is found");
+ }
+ }
+ return returnOIResolver.get();
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ for (int i=0; i<arguments.length; i++) {
+ Object ai = arguments[i].get();
+ if (ai == null) {
+ continue;
+ }
+ return returnOIResolver.convertIfNecessary(ai, argumentOIs[i]);
+ }
+ return null;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("COALESCE(");
+ if (children.length > 0) {
+ sb.append(children[0]);
+ for(int i=1; i<children.length; i++) {
+ sb.append(",");
+ sb.append(children[i]);
+ }
+ }
+ sb.append(")");
+ return sb.toString();
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q Fri Jun 12 00:13:33 2009
@@ -0,0 +1 @@
+SELECT COALESCE(1, 2.0) FROM src LIMIT 1;
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q Fri Jun 12 00:13:33 2009
@@ -0,0 +1,51 @@
+EXPLAIN
+SELECT COALESCE(1),
+ COALESCE(1, 2),
+ COALESCE(NULL, 2),
+ COALESCE(1, NULL),
+ COALESCE(NULL, NULL, 3),
+ COALESCE(4, NULL, NULL, NULL),
+ COALESCE('1'),
+ COALESCE('1', '2'),
+ COALESCE(NULL, '2'),
+ COALESCE('1', NULL),
+ COALESCE(NULL, NULL, '3'),
+ COALESCE('4', NULL, NULL, NULL),
+ COALESCE(1.0),
+ COALESCE(1.0, 2.0),
+ COALESCE(NULL, 2.0),
+ COALESCE(NULL, 2.0, 3.0),
+ COALESCE(2.0, NULL, 3.0),
+ COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1;
+
+SELECT COALESCE(1),
+ COALESCE(1, 2),
+ COALESCE(NULL, 2),
+ COALESCE(1, NULL),
+ COALESCE(NULL, NULL, 3),
+ COALESCE(4, NULL, NULL, NULL),
+ COALESCE('1'),
+ COALESCE('1', '2'),
+ COALESCE(NULL, '2'),
+ COALESCE('1', NULL),
+ COALESCE(NULL, NULL, '3'),
+ COALESCE('4', NULL, NULL, NULL),
+ COALESCE(1.0),
+ COALESCE(1.0, 2.0),
+ COALESCE(NULL, 2.0),
+ COALESCE(NULL, 2.0, 3.0),
+ COALESCE(2.0, NULL, 3.0),
+ COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1;
+
+EXPLAIN
+SELECT COALESCE(src_thrift.lint[1], 999),
+ COALESCE(src_thrift.lintstring[0].mystring, '999'),
+ COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift;
+
+SELECT COALESCE(src_thrift.lint[1], 999),
+ COALESCE(src_thrift.lintstring[0].mystring, '999'),
+ COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift;
Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out Fri Jun 12 00:13:33 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 1:19 Argument Type Mismatch 2.0: The expressions after COALESCE should all have the same type: "int" is expected but "double" is found
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out Fri Jun 12 00:13:33 2009
@@ -0,0 +1,168 @@
+query: EXPLAIN
+SELECT COALESCE(1),
+ COALESCE(1, 2),
+ COALESCE(NULL, 2),
+ COALESCE(1, NULL),
+ COALESCE(NULL, NULL, 3),
+ COALESCE(4, NULL, NULL, NULL),
+ COALESCE('1'),
+ COALESCE('1', '2'),
+ COALESCE(NULL, '2'),
+ COALESCE('1', NULL),
+ COALESCE(NULL, NULL, '3'),
+ COALESCE('4', NULL, NULL, NULL),
+ COALESCE(1.0),
+ COALESCE(1.0, 2.0),
+ COALESCE(NULL, 2.0),
+ COALESCE(NULL, 2.0, 3.0),
+ COALESCE(2.0, NULL, 3.0),
+ COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE 1)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL 3)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 4 TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL '3')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '4' TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0 3.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 2.0 TOK_NULL 3.0)) (TOK_SELEX
PR (TOK_FUNCTION COALESCE (TOK_FUNCTION IF TRUE TOK_NULL 0) TOK_NULL))) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ Select Operator
+ Select Operator
+ expressions:
+ expr: COALESCE(1)
+ type: int
+ expr: COALESCE(1,2)
+ type: int
+ expr: COALESCE(null,2)
+ type: int
+ expr: COALESCE(1,null)
+ type: int
+ expr: COALESCE(null,null,3)
+ type: int
+ expr: COALESCE(4,null,null,null)
+ type: int
+ expr: COALESCE('1')
+ type: string
+ expr: COALESCE('1','2')
+ type: string
+ expr: COALESCE(null,'2')
+ type: string
+ expr: COALESCE('1',null)
+ type: string
+ expr: COALESCE(null,null,'3')
+ type: string
+ expr: COALESCE('4',null,null,null)
+ type: string
+ expr: COALESCE(1.0)
+ type: double
+ expr: COALESCE(1.0,2.0)
+ type: double
+ expr: COALESCE(null,2.0)
+ type: double
+ expr: COALESCE(null,2.0,3.0)
+ type: double
+ expr: COALESCE(2.0,null,3.0)
+ type: double
+ expr: COALESCE(if true, null, 0,null)
+ type: int
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+
+
+query: SELECT COALESCE(1),
+ COALESCE(1, 2),
+ COALESCE(NULL, 2),
+ COALESCE(1, NULL),
+ COALESCE(NULL, NULL, 3),
+ COALESCE(4, NULL, NULL, NULL),
+ COALESCE('1'),
+ COALESCE('1', '2'),
+ COALESCE(NULL, '2'),
+ COALESCE('1', NULL),
+ COALESCE(NULL, NULL, '3'),
+ COALESCE('4', NULL, NULL, NULL),
+ COALESCE(1.0),
+ COALESCE(1.0, 2.0),
+ COALESCE(NULL, 2.0),
+ COALESCE(NULL, 2.0, 3.0),
+ COALESCE(2.0, NULL, 3.0),
+ COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1
+Input: default/src
+Output: file:/data/users/zshao/tools/495-trunk-apache-hive/build/ql/tmp/64965527/10000
+1 1 2 1 3 4 1 1 2 1 3 4 1.0 1.0 2.0 2.0 2.0 NULL
+query: EXPLAIN
+SELECT COALESCE(src_thrift.lint[1], 999),
+ COALESCE(src_thrift.lintstring[0].mystring, '999'),
+ COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF src_thrift)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 1) 999)) (TOK_SELEXPR (TOK_FUNCTION COALESCE (. ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0) mystring) '999')) (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_2') '999')))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src_thrift
+ Select Operator
+ expressions:
+ expr: lint
+ type: array<int>
+ expr: lintstring
+ type: array<struct<myint:int,mystring:string,underscore_int:int>>
+ expr: mstringstring
+ type: map<string,string>
+ Select Operator
+ expressions:
+ expr: COALESCE(0[1],999)
+ type: int
+ expr: COALESCE(1[0].mystring,'999')
+ type: string
+ expr: COALESCE(2['key_2'],'999')
+ type: string
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+query: SELECT COALESCE(src_thrift.lint[1], 999),
+ COALESCE(src_thrift.lintstring[0].mystring, '999'),
+ COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift
+Input: default/src_thrift
+Output: file:/data/users/zshao/tools/495-trunk-apache-hive/build/ql/tmp/2056249139/10000
+0 0 999
+2 1 999
+4 8 value_2
+6 27 999
+8 64 999
+10 125 999
+12 216 999
+14 343 999
+16 512 999
+18 729 999
+999 999 999