You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/12 02:13:33 UTC

svn commit: r783961 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientnegative/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientnegative/ q...

Author: namit
Date: Fri Jun 12 00:13:33 2009
New Revision: 783961

URL: http://svn.apache.org/viewvc?rev=783961&view=rev
Log:
HIVE-470. Add COALESCE. (Zheng Shao via namit)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q
    hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=783961&r1=783960&r2=783961&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Jun 12 00:13:33 2009
@@ -41,6 +41,8 @@
 
     HIVE-195. Map-side join. (Namit Jain via zshao)
 
+    HIVE-470. Add COALESCE. (Zheng Shao via namit)
+
   IMPROVEMENTS
     HIVE-389. Option to build without ivy (jssarma)
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=783961&r1=783960&r2=783961&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri Jun 12 00:13:33 2009
@@ -162,6 +162,7 @@
     registerGenericUDF("case", GenericUDFCase.class);
     registerGenericUDF("when", GenericUDFWhen.class);
     registerGenericUDF("hash", GenericUDFHash.class);
+    registerGenericUDF("coalesce", GenericUDFCoalesce.class);
   }
 
   public static FunctionInfo getInfo(Class<?> fClass) {

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java Fri Jun 12 00:13:33 2009
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+/**
+ * GenericUDF Class for SQL construct "COALESCE(a, b, c)".
+ * 
+ * NOTES:
+ * 1. a, b and c should have the same TypeInfo, or an exception will be thrown.
+ */
+public class GenericUDFCoalesce extends GenericUDF {
+
+  private static Log LOG = LogFactory.getLog(GenericUDFCoalesce.class.getName());
+
+  ObjectInspector[] argumentOIs;
+  GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
+  
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments)
+      throws UDFArgumentTypeException {
+    
+    this.argumentOIs = arguments;
+    
+    returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver();
+    for (int i=0; i<arguments.length; i++) {
+      if (!returnOIResolver.update(arguments[i])) {
+        throw new UDFArgumentTypeException(i,
+            "The expressions after COALESCE should all have the same type: \""
+            + returnOIResolver.get().getTypeName() + "\" is expected but \"" 
+            + arguments[i].getTypeName() + "\" is found");
+      }
+    }
+    return returnOIResolver.get();
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    for (int i=0; i<arguments.length; i++) {
+      Object ai = arguments[i].get();
+      if (ai == null) {
+        continue;
+      }
+      return returnOIResolver.convertIfNecessary(ai, argumentOIs[i]);
+    }
+    return null;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("COALESCE(");
+    if (children.length > 0) {
+      sb.append(children[0]);
+      for(int i=1; i<children.length; i++) {
+        sb.append(",");
+        sb.append(children[i]);
+      }
+    }
+    sb.append(")");
+    return sb.toString();
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/udf_coalesce.q Fri Jun 12 00:13:33 2009
@@ -0,0 +1 @@
+SELECT COALESCE(1, 2.0) FROM src LIMIT 1;

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_coalesce.q Fri Jun 12 00:13:33 2009
@@ -0,0 +1,51 @@
+EXPLAIN
+SELECT COALESCE(1),
+       COALESCE(1, 2),
+       COALESCE(NULL, 2),
+       COALESCE(1, NULL),
+       COALESCE(NULL, NULL, 3),
+       COALESCE(4, NULL, NULL, NULL),
+       COALESCE('1'),
+       COALESCE('1', '2'),
+       COALESCE(NULL, '2'),
+       COALESCE('1', NULL),
+       COALESCE(NULL, NULL, '3'),
+       COALESCE('4', NULL, NULL, NULL),
+       COALESCE(1.0),
+       COALESCE(1.0, 2.0),
+       COALESCE(NULL, 2.0),
+       COALESCE(NULL, 2.0, 3.0),
+       COALESCE(2.0, NULL, 3.0),
+       COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1;
+
+SELECT COALESCE(1),
+       COALESCE(1, 2),
+       COALESCE(NULL, 2),
+       COALESCE(1, NULL),
+       COALESCE(NULL, NULL, 3),
+       COALESCE(4, NULL, NULL, NULL),
+       COALESCE('1'),
+       COALESCE('1', '2'),
+       COALESCE(NULL, '2'),
+       COALESCE('1', NULL),
+       COALESCE(NULL, NULL, '3'),
+       COALESCE('4', NULL, NULL, NULL),
+       COALESCE(1.0),
+       COALESCE(1.0, 2.0),
+       COALESCE(NULL, 2.0),
+       COALESCE(NULL, 2.0, 3.0),
+       COALESCE(2.0, NULL, 3.0),
+       COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1;
+
+EXPLAIN
+SELECT COALESCE(src_thrift.lint[1], 999),
+       COALESCE(src_thrift.lintstring[0].mystring, '999'),
+       COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift;
+
+SELECT COALESCE(src_thrift.lint[1], 999),
+       COALESCE(src_thrift.lintstring[0].mystring, '999'),
+       COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift;

Added: hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientnegative/udf_coalesce.q.out Fri Jun 12 00:13:33 2009
@@ -0,0 +1 @@
+FAILED: Error in semantic analysis: line 1:19 Argument Type Mismatch 2.0: The expressions after COALESCE should all have the same type: "int" is expected but "double" is found

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out?rev=783961&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_coalesce.q.out Fri Jun 12 00:13:33 2009
@@ -0,0 +1,168 @@
+query: EXPLAIN
+SELECT COALESCE(1),
+       COALESCE(1, 2),
+       COALESCE(NULL, 2),
+       COALESCE(1, NULL),
+       COALESCE(NULL, NULL, 3),
+       COALESCE(4, NULL, NULL, NULL),
+       COALESCE('1'),
+       COALESCE('1', '2'),
+       COALESCE(NULL, '2'),
+       COALESCE('1', NULL),
+       COALESCE(NULL, NULL, '3'),
+       COALESCE('4', NULL, NULL, NULL),
+       COALESCE(1.0),
+       COALESCE(1.0, 2.0),
+       COALESCE(NULL, 2.0),
+       COALESCE(NULL, 2.0, 3.0),
+       COALESCE(2.0, NULL, 3.0),
+       COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE 1)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1 TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL 3)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 4 TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL '2')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '1' TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL TOK_NULL '3')) (TOK_SELEXPR (TOK_FUNCTION COALESCE '4' TOK_NULL TOK_NULL TOK_NULL)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 1.0 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE TOK_NULL 2.0 3.0)) (TOK_SELEXPR (TOK_FUNCTION COALESCE 2.0 TOK_NULL 3.0)) (TOK_SELEX
 PR (TOK_FUNCTION COALESCE (TOK_FUNCTION IF TRUE TOK_NULL 0) TOK_NULL))) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+            Select Operator
+              Select Operator
+                expressions:
+                      expr: COALESCE(1)
+                      type: int
+                      expr: COALESCE(1,2)
+                      type: int
+                      expr: COALESCE(null,2)
+                      type: int
+                      expr: COALESCE(1,null)
+                      type: int
+                      expr: COALESCE(null,null,3)
+                      type: int
+                      expr: COALESCE(4,null,null,null)
+                      type: int
+                      expr: COALESCE('1')
+                      type: string
+                      expr: COALESCE('1','2')
+                      type: string
+                      expr: COALESCE(null,'2')
+                      type: string
+                      expr: COALESCE('1',null)
+                      type: string
+                      expr: COALESCE(null,null,'3')
+                      type: string
+                      expr: COALESCE('4',null,null,null)
+                      type: string
+                      expr: COALESCE(1.0)
+                      type: double
+                      expr: COALESCE(1.0,2.0)
+                      type: double
+                      expr: COALESCE(null,2.0)
+                      type: double
+                      expr: COALESCE(null,2.0,3.0)
+                      type: double
+                      expr: COALESCE(2.0,null,3.0)
+                      type: double
+                      expr: COALESCE(if true, null, 0,null)
+                      type: int
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+
+
+query: SELECT COALESCE(1),
+       COALESCE(1, 2),
+       COALESCE(NULL, 2),
+       COALESCE(1, NULL),
+       COALESCE(NULL, NULL, 3),
+       COALESCE(4, NULL, NULL, NULL),
+       COALESCE('1'),
+       COALESCE('1', '2'),
+       COALESCE(NULL, '2'),
+       COALESCE('1', NULL),
+       COALESCE(NULL, NULL, '3'),
+       COALESCE('4', NULL, NULL, NULL),
+       COALESCE(1.0),
+       COALESCE(1.0, 2.0),
+       COALESCE(NULL, 2.0),
+       COALESCE(NULL, 2.0, 3.0),
+       COALESCE(2.0, NULL, 3.0),
+       COALESCE(IF(TRUE, NULL, 0), NULL)
+FROM src LIMIT 1
+Input: default/src
+Output: file:/data/users/zshao/tools/495-trunk-apache-hive/build/ql/tmp/64965527/10000
+1	1	2	1	3	4	1	1	2	1	3	4	1.0	1.0	2.0	2.0	2.0	NULL
+query: EXPLAIN
+SELECT COALESCE(src_thrift.lint[1], 999),
+       COALESCE(src_thrift.lintstring[0].mystring, '999'),
+       COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src_thrift)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) lint) 1) 999)) (TOK_SELEXPR (TOK_FUNCTION COALESCE (. ([ (. (TOK_TABLE_OR_COL src_thrift) lintstring) 0) mystring) '999')) (TOK_SELEXPR (TOK_FUNCTION COALESCE ([ (. (TOK_TABLE_OR_COL src_thrift) mstringstring) 'key_2') '999')))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src_thrift 
+            Select Operator
+              expressions:
+                    expr: lint
+                    type: array<int>
+                    expr: lintstring
+                    type: array<struct<myint:int,mystring:string,underscore_int:int>>
+                    expr: mstringstring
+                    type: map<string,string>
+              Select Operator
+                expressions:
+                      expr: COALESCE(0[1],999)
+                      type: int
+                      expr: COALESCE(1[0].mystring,'999')
+                      type: string
+                      expr: COALESCE(2['key_2'],'999')
+                      type: string
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+query: SELECT COALESCE(src_thrift.lint[1], 999),
+       COALESCE(src_thrift.lintstring[0].mystring, '999'),
+       COALESCE(src_thrift.mstringstring['key_2'], '999')
+FROM src_thrift
+Input: default/src_thrift
+Output: file:/data/users/zshao/tools/495-trunk-apache-hive/build/ql/tmp/2056249139/10000
+0	0	999
+2	1	999
+4	8	value_2
+6	27	999
+8	64	999
+10	125	999
+12	216	999
+14	343	999
+16	512	999
+18	729	999
+999	999	999