You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/10/04 05:08:37 UTC
hive git commit: HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng
Xiong, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 0a4b3d8ff -> e19f0e35e
HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e19f0e35
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e19f0e35
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e19f0e35
Branch: refs/heads/master
Commit: e19f0e35e09ca283e5de46ae7e2db1e11396335e
Parents: 0a4b3d8
Author: Pengcheng Xiong <px...@apache.org>
Authored: Mon Oct 3 22:07:24 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Mon Oct 3 22:07:24 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 1 +
.../udf/generic/GenericUDTFReplicateRows.java | 88 +++++++++++++++
.../clientpositive/udtf_replicate_rows.q | 23 ++++
.../results/clientpositive/show_functions.q.out | 1 +
.../clientpositive/udtf_replicate_rows.q.out | 107 +++++++++++++++++++
5 files changed, 220 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 6870dfa..6b29be1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -468,6 +468,7 @@ public final class FunctionRegistry {
// Generic UDTF's
system.registerGenericUDTF("explode", GenericUDTFExplode.class);
+ system.registerGenericUDTF("replicate_rows", GenericUDTFReplicateRows.class);
system.registerGenericUDTF("inline", GenericUDTFInline.class);
system.registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class);
system.registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class);
http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
new file mode 100644
index 0000000..164445d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * Takes a row of data and repeats n times.
+ */
+@Description(name = "replicate_rows", value = "_FUNC_(n, cols...) - turns 1 row into n rows")
+public class GenericUDTFReplicateRows extends GenericUDTF {
+ @Override
+ public void close() throws HiveException {
+ }
+
+ private transient List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>();
+
+ @Override
+ public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
+ if (args.length < 2) {
+ throw new UDFArgumentException("UDTFReplicateRows() expects at least two arguments.");
+ }
+ if (!(args[0] instanceof LongObjectInspector)) {
+ throw new UDFArgumentException(
+ "The first argument to UDTFReplicateRows() must be a long (got "
+ + args[0].getTypeName() + " instead).");
+ }
+
+ ArrayList<String> fieldNames = new ArrayList<String>();
+ ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
+ for (int index = 0; index < args.length; ++index) {
+ fieldNames.add("col" + index);
+ fieldOIs.add(args[index]);
+ }
+ argOIs = fieldOIs;
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+ }
+
+ @Override
+ public void process(Object[] args) throws HiveException, UDFArgumentException {
+
+ long numRows = ((LongObjectInspector) argOIs.get(0)).get(args[0]);
+
+ for (long n = 0; n < numRows; n++) {
+ forward(args);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "UDTFReplicateRows";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udtf_replicate_rows.q b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
new file mode 100644
index 0000000..a074a78
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q
@@ -0,0 +1,23 @@
+set hive.mapred.mode=nonstrict;
+set hive.cbo.enable=false;
+
+DESCRIBE FUNCTION replicate_rows;
+DESCRIBE FUNCTION EXTENDED replicate_rows;
+
+create table t (x bigint, y string, z int);
+
+insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3);
+
+SELECT replicate_rows(x,y) FROM t;
+
+SELECT replicate_rows(x,y,y) FROM t;
+
+SELECT replicate_rows(x,y,y,y,z) FROM t;
+
+select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq;
+
+select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq;
+
+SELECT replicate_rows(x,concat(y,'...'),y) FROM t;
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 4a40094..7b746ff 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -186,6 +186,7 @@ regexp_extract
regexp_replace
repeat
replace
+replicate_rows
reverse
rlike
round
http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
new file mode 100644
index 0000000..f76a584
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out
@@ -0,0 +1,107 @@
+PREHOOK: query: DESCRIBE FUNCTION replicate_rows
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION replicate_rows
+POSTHOOK: type: DESCFUNCTION
+replicate_rows(n, cols...) - turns 1 row into n rows
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows
+POSTHOOK: type: DESCFUNCTION
+replicate_rows(n, cols...) - turns 1 row into n rows
+PREHOOK: query: create table t (x bigint, y string, z int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (x bigint, y string, z int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: t.z EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: SELECT replicate_rows(x,y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3 2
+3 2
+3 2
+2 3
+2 3
+PREHOOK: query: SELECT replicate_rows(x,y,y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y,y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3 2 2
+3 2 2
+3 2 2
+2 3 3
+2 3 3
+PREHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3 2 2 2 0
+3 2 2 2 0
+3 2 2 2 0
+2 3 3 3 1
+2 3 3 3 1
+PREHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+2 3
+2 3
+2 3
+3 2
+3 2
+PREHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3 2 2
+3 2 2
+3 2 2
+2 3 3
+2 3 3
+PREHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+3 2... 2
+3 2... 2
+3 2... 2
+2 3... 3
+2 3... 3