You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/10/04 05:08:37 UTC

hive git commit: HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 0a4b3d8ff -> e19f0e35e


HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e19f0e35
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e19f0e35
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e19f0e35

Branch: refs/heads/master
Commit: e19f0e35e09ca283e5de46ae7e2db1e11396335e
Parents: 0a4b3d8
Author: Pengcheng Xiong <px...@apache.org>
Authored: Mon Oct 3 22:07:24 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Mon Oct 3 22:07:24 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   1 +
 .../udf/generic/GenericUDTFReplicateRows.java   |  88 +++++++++++++++
 .../clientpositive/udtf_replicate_rows.q        |  23 ++++
 .../results/clientpositive/show_functions.q.out |   1 +
 .../clientpositive/udtf_replicate_rows.q.out    | 107 +++++++++++++++++++
 5 files changed, 220 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 6870dfa..6b29be1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -468,6 +468,7 @@ public final class FunctionRegistry {
 
     // Generic UDTF's
     system.registerGenericUDTF("explode", GenericUDTFExplode.class);
+    system.registerGenericUDTF("replicate_rows", GenericUDTFReplicateRows.class);
     system.registerGenericUDTF("inline", GenericUDTFInline.class);
     system.registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class);
     system.registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
new file mode 100644
index 0000000..164445d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * Takes a row of data and repeats n times.
+ */
+@Description(name = "replicate_rows", value = "_FUNC_(n, cols...) - turns 1 row into n rows")
+public class GenericUDTFReplicateRows extends GenericUDTF {
+  @Override
+  public void close() throws HiveException {
+  }
+
+  private transient List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
+
+  @Override
+  public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
+    if (args.length < 2) {
+      throw new UDFArgumentException("UDTFReplicateRows() expects at least two arguments.");
+    }
+    if (!(args[0] instanceof LongObjectInspector)) {
+      throw new UDFArgumentException(
+          "The first argument to UDTFReplicateRows() must be a long (got "
+              + args[0].getTypeName() + " instead).");
+    }
+
+    ArrayList<String> fieldNames = new ArrayList<String>();
+    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
+    for (int index = 0; index < args.length; ++index) {
+      fieldNames.add("col" + index);
+      fieldOIs.add(args[index]);
+    }
+    argOIs = fieldOIs;
+    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+  }
+
+  @Override
+  public void process(Object[] args) throws HiveException, UDFArgumentException {
+
+    long numRows = ((LongObjectInspector) argOIs.get(0)).get(args[0]);
+
+    for (long n = 0; n < numRows; n++) {
+      forward(args);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "UDTFReplicateRows";
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udtf_replicate_rows.q b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
new file mode 100644
index 0000000..a074a78
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
@@ -0,0 +1,23 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=false;
+
+DESCRIBE FUNCTION replicate_rows;
+DESCRIBE FUNCTION EXTENDED replicate_rows;
+
+create table t (x bigint, y string, z int);
+
+insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3);
+
+SELECT replicate_rows(x,y) FROM t;
+
+SELECT replicate_rows(x,y,y) FROM t;
+
+SELECT replicate_rows(x,y,y,y,z) FROM t;
+
+select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq;
+
+select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq;
+
+SELECT replicate_rows(x,concat(y,'...'),y) FROM t;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 4a40094..7b746ff 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -186,6 +186,7 @@ regexp_extract
 regexp_replace
 repeat
 replace
+replicate_rows
 reverse
 rlike
 round

http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
new file mode 100644
index 0000000..f76a584
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
@@ -0,0 +1,107 @@
+PREHOOK: query: DESCRIBE FUNCTION replicate_rows
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION replicate_rows
+POSTHOOK: type: DESCFUNCTION
+replicate_rows(n, cols...) - turns 1 row into n rows
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows
+POSTHOOK: type: DESCFUNCTION
+replicate_rows(n, cols...) - turns 1 row into n rows
+PREHOOK: query: create table t (x bigint, y string, z int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (x bigint, y string, z int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: t.z EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: SELECT replicate_rows(x,y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3	2
+3	2
+3	2
+2	3
+2	3
+PREHOOK: query: SELECT replicate_rows(x,y,y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y,y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3	2	2
+3	2	2
+3	2	2
+2	3	3
+2	3	3
+PREHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3	2	2	2	0
+3	2	2	2	0
+3	2	2	2	0
+2	3	3	3	1
+2	3	3	3	1
+PREHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+2	3
+2	3
+2	3
+3	2
+3	2
+PREHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3	2	2
+3	2	2
+3	2	2
+2	3	3
+2	3	3
+PREHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3	2...	2
+3	2...	2
+3	2...	2
+2	3...	3
+2	3...	3