You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/10/07 07:35:21 UTC

svn commit: r822592 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Wed Oct  7 05:35:20 2009
New Revision: 822592

URL: http://svn.apache.org/viewvc?rev=822592&view=rev
Log:
HIVE-682. Add concat_ws (Jonathan Chang via namit)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=822592&r1=822591&r2=822592&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Oct  7 05:35:20 2009
@@ -57,6 +57,8 @@
     HIVE-31. Support Create Table As Select
     (Ning Zhang via namit)
 
+    HIVE-682. Add concat_ws (Jonathan Chang via namit)
+
   IMPROVEMENTS
 
     HIVE-760. Add version info to META-INF/MANIFEST.MF.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=822592&r1=822591&r2=822592&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Wed Oct  7 05:35:20 2009
@@ -210,6 +210,7 @@
     registerGenericUDF("instr", GenericUDFInstr.class);
     registerGenericUDF("locate", GenericUDFLocate.class);
     registerGenericUDF("elt", GenericUDFElt.class);
+    registerGenericUDF("concat_ws", GenericUDFConcatWS.class);
   }
 
   public static void registerTemporaryUDF(String functionName, Class<? extends UDF> UDFClass,

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java?rev=822592&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java Wed Oct  7 05:35:20 2009
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.exec.description;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Generic UDF for string function <code>CONCAT_WS(sep,str1,str2,str3,...)</code>.
+ * This mimics the function from MySQL
+ * http://dev.mysql.com/doc/refman/5.0/en/string-functions.html#function_concat-ws
+ *
+ * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+ */
+@description(
+    name = "concat_ws",
+    value = "_FUNC_(separator, str1, str2, ...) - " +
+            "returns the concatenation of the strings separated by the separator.",
+    extended = "Example:\n" +
+      "  > SELECT _FUNC_('ce', 'fa', 'book') FROM src LIMIT 1;\n" +
+      "  'facebook'")
+
+public class GenericUDFConcatWS extends GenericUDF {
+
+  ObjectInspector[] argumentOIs;
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments)
+    throws UDFArgumentException {
+    if (arguments.length < 2) {
+      throw new UDFArgumentLengthException(
+        "The function CONCAT_WS(separator,str1,str2,str3,...) needs at least two arguments.");
+    }
+
+    for (int i = 0; i < arguments.length; i++) {
+      if(arguments[i].getTypeName() != Constants.STRING_TYPE_NAME
+         && arguments[i].getTypeName() != Constants.VOID_TYPE_NAME) {
+        throw new UDFArgumentTypeException(i,
+                                           "Argument " + (i +1 )  + " of function CONCAT_WS must be \"" + Constants.STRING_TYPE_NAME
+                                           + "\", but \"" + arguments[i].getTypeName() + "\" was found.");
+      }
+    }
+
+    this.argumentOIs = arguments;
+    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+  }
+
+  private Text resultText = new Text();
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    if (arguments[0].get() == null) {
+      return null;
+    }
+    String separator =
+      ((StringObjectInspector)argumentOIs[0]).getPrimitiveJavaObject(arguments[0].get());
+
+    StringBuilder sb = new StringBuilder();
+    boolean first = true;
+    for (int i = 1; i < arguments.length; i++) {
+      if (arguments[i].get() != null) {
+        if (first) {
+          first = false;
+        } else {
+          sb.append(separator);
+        }
+        sb.append(((StringObjectInspector)argumentOIs[i]).getPrimitiveJavaObject(arguments[i].get()));
+      }
+    }
+
+    resultText.set(sb.toString());
+    return resultText;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    assert (children.length >= 2);
+    StringBuilder sb = new StringBuilder();
+    sb.append("concat_ws(");
+    for (int i = 0; i < children.length - 1; i++) {
+      sb.append(children[i]).append(", ");
+    }
+    sb.append(children[children.length - 1]).append(")");
+    return sb.toString();
+  }
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q?rev=822592&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_concat_ws.q Wed Oct  7 05:35:20 2009
@@ -0,0 +1,18 @@
+DESCRIBE FUNCTION concat_ws;
+DESCRIBE FUNCTION EXTENDED concat_ws;
+
+CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING);
+
+FROM src INSERT OVERWRITE TABLE dest1 SELECT 'abc', 'xyz', '8675309'  WHERE src.key = 86;
+
+EXPLAIN
+SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1;
+
+SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1;
+

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=822592&r1=822591&r2=822592&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Wed Oct  7 05:35:20 2009
@@ -32,6 +32,7 @@
 ceiling
 coalesce
 concat
+concat_ws
 conv
 cos
 count
@@ -129,6 +130,7 @@
 ceiling
 coalesce
 concat
+concat_ws
 conv
 cos
 count

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out?rev=822592&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_concat_ws.q.out Wed Oct  7 05:35:20 2009
@@ -0,0 +1,90 @@
+PREHOOK: query: DESCRIBE FUNCTION concat_ws
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION concat_ws
+POSTHOOK: type: DESCFUNCTION
+concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED concat_ws
+POSTHOOK: type: DESCFUNCTION
+concat_ws(separator, str1, str2, ...) - returns the concatenation of the strings separated by the separator.
+Example:
+  > SELECT concat_ws('ce', 'fa', 'book') FROM src LIMIT 1;
+  'facebook'
+PREHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest1(c1 STRING, c2 STRING, c3 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 'abc', 'xyz', '8675309'  WHERE src.key = 86
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT 'abc', 'xyz', '8675309'  WHERE src.key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF dest1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION concat_ws (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws ',' (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws TOK_NULL (. (TOK_TABLE_OR_COL dest1) c1) (. (TOK_TABLE_OR_COL dest1) c2) (. (TOK_TABLE_OR_COL dest1) c3))) (TOK_SELEXPR (TOK_FUNCTION concat_ws '**' (. (TOK_TABLE_OR_COL dest1) c1) TOK_NULL (. (TOK_TABLE_OR_COL dest1) c3))))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        dest1 
+          TableScan
+            alias: dest1
+            Select Operator
+              expressions:
+                    expr: concat_ws(c1, c2, c3)
+                    type: string
+                    expr: concat_ws(',', c1, c2, c3)
+                    type: string
+                    expr: concat_ws(null, c1, c2, c3)
+                    type: string
+                    expr: concat_ws('**', c1, null, c3)
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+PREHOOK: Output: file:/mnt/vol/devrs003.snc1/jonchang/trunk/build/ql/tmp/2092269896/10000
+POSTHOOK: query: SELECT concat_ws(dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(',', dest1.c1, dest1.c2, dest1.c3),
+       concat_ws(NULL, dest1.c1, dest1.c2, dest1.c3),
+       concat_ws('**', dest1.c1, NULL, dest1.c3) FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+POSTHOOK: Output: file:/mnt/vol/devrs003.snc1/jonchang/trunk/build/ql/tmp/2092269896/10000
+xyzabc8675309	abc,xyz,8675309	NULL	abc**8675309