You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2019/03/22 04:18:00 UTC
[hive] branch master updated: HIVE-21283: Create Synonym mid for
substr, position for locate (Mani M, reviewed by Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 2b181dc HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan)
2b181dc is described below
commit 2b181dc7aeccf3d92968e68be0d3a9aabd0350e2
Author: Mani M <rm...@gmail.com>
AuthorDate: Fri Mar 22 09:46:43 2019 +0530
HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan)
Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +
.../org/apache/hadoop/hive/ql/udf/UDFSubstr.java | 2 +-
.../hive/ql/udf/generic/GenericUDFLocate.java | 2 +-
ql/src/test/queries/clientpositive/udf_mid.q | 3 +
ql/src/test/queries/clientpositive/udf_position.q | 44 ++++++++
.../results/clientpositive/show_functions.q.out | 3 +
.../test/results/clientpositive/udf_locate.q.out | 1 +
ql/src/test/results/clientpositive/udf_mid.q.out | 21 ++++
.../test/results/clientpositive/udf_position.q.out | 121 +++++++++++++++++++++
.../test/results/clientpositive/udf_substr.q.out | 2 +-
.../results/clientpositive/udf_substring.q.out | 2 +-
11 files changed, 199 insertions(+), 4 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 3e4aa74..ed41bef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -193,6 +193,7 @@ public final class FunctionRegistry {
static {
system.registerGenericUDF("concat", GenericUDFConcat.class);
+ system.registerUDF("mid", UDFSubstr.class, false);
system.registerUDF("substr", UDFSubstr.class, false);
system.registerUDF("substring", UDFSubstr.class, false);
system.registerGenericUDF("substring_index", GenericUDFSubstringIndex.class);
@@ -498,6 +499,7 @@ public final class FunctionRegistry {
system.registerGenericUDF("in_file", GenericUDFInFile.class);
system.registerGenericUDF("instr", GenericUDFInstr.class);
system.registerGenericUDF("locate", GenericUDFLocate.class);
+ system.registerGenericUDF("position", GenericUDFLocate.class);
system.registerGenericUDF("elt", GenericUDFElt.class);
system.registerGenericUDF("concat_ws", GenericUDFConcatWS.class);
system.registerGenericUDF("sort_array", GenericUDFSortArray.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
index 4136cc7..5b1964c 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
@@ -33,7 +33,7 @@ import org.apache.hadoop.io.Text;
* UDFSubstr.
*
*/
-@Description(name = "substr,substring",
+@Description(name = "substr,substring,mid",
value = "_FUNC_(str, pos[, len]) - returns the substring of str that"
+ " starts at pos and is of length len or" +
"_FUNC_(bin, pos[, len]) - returns the slice of byte array that"
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
index cebc752..a077222 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
* </pre>
* <p>
*/
-@Description(name = "locate",
+@Description(name = "locate,position",
value = "_FUNC_(substr, str[, pos]) - Returns the position of the first "
+ "occurance of substr in str after position pos",
extended = "Example:\n"
diff --git a/ql/src/test/queries/clientpositive/udf_mid.q b/ql/src/test/queries/clientpositive/udf_mid.q
new file mode 100644
index 0000000..a2a6526
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mid.q
@@ -0,0 +1,3 @@
+-- Synonym. See udf_substr.q
+DESCRIBE FUNCTION mid;
+DESCRIBE FUNCTION EXTENDED mid;
diff --git a/ql/src/test/queries/clientpositive/udf_position.q b/ql/src/test/queries/clientpositive/udf_position.q
new file mode 100644
index 0000000..232ed77
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_position.q
@@ -0,0 +1,44 @@
+--! qt:dataset:src
+set hive.fetch.task.conversion=more;
+
+DESCRIBE FUNCTION position;
+DESCRIBE FUNCTION EXTENDED position;
+
+EXPLAIN
+SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows);
+
+SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows);
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index dc2b436..0b772f0 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -168,6 +168,7 @@ mask_show_last_n
matchpath
max
md5
+mid
min
minute
mod
@@ -196,6 +197,7 @@ percentile_approx
pi
pmod
posexplode
+position
positive
pow
power
@@ -437,6 +439,7 @@ mask_show_last_n
matchpath
max
md5
+mid
min
minute
mod
diff --git a/ql/src/test/results/clientpositive/udf_locate.q.out b/ql/src/test/results/clientpositive/udf_locate.q.out
index 63213b3..cb3f63b 100644
--- a/ql/src/test/results/clientpositive/udf_locate.q.out
+++ b/ql/src/test/results/clientpositive/udf_locate.q.out
@@ -8,6 +8,7 @@ PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED locate
POSTHOOK: type: DESCFUNCTION
locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Synonyms: position
Example:
> SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
7
diff --git a/ql/src/test/results/clientpositive/udf_mid.q.out b/ql/src/test/results/clientpositive/udf_mid.q.out
new file mode 100644
index 0000000..0e28452
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mid.q.out
@@ -0,0 +1,21 @@
+PREHOOK: query: DESCRIBE FUNCTION mid
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mid
+POSTHOOK: type: DESCFUNCTION
+mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED mid
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED mid
+POSTHOOK: type: DESCFUNCTION
+mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
+Synonyms: substr, substring
+pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
+Example:
+ > SELECT mid('Facebook', 5) FROM src LIMIT 1;
+ 'book'
+ > SELECT mid('Facebook', -5) FROM src LIMIT 1;
+ 'ebook'
+ > SELECT mid('Facebook', 5, 1) FROM src LIMIT 1;
+ 'b'
+Function class:org.apache.hadoop.hive.ql.udf.UDFSubstr
+Function type:BUILTIN
diff --git a/ql/src/test/results/clientpositive/udf_position.q.out b/ql/src/test/results/clientpositive/udf_position.q.out
new file mode 100644
index 0000000..ac5f6c0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_position.q.out
@@ -0,0 +1,121 @@
+PREHOOK: query: DESCRIBE FUNCTION position
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION position
+POSTHOOK: type: DESCFUNCTION
+position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED position
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED position
+POSTHOOK: type: DESCFUNCTION
+position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Synonyms: locate
+Example:
+ > SELECT position('bar', 'foobarbar', 5) FROM src LIMIT 1;
+ 7
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate
+Function type:BUILTIN
+PREHOOK: query: EXPLAIN
+SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: src
+ Row Limit Per Split: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1 (type: int), 0 (type: int), 2 (type: int), 2 (type: int), 4 (type: int), 4 (type: int), 0 (type: int), 0 (type: int), 2 (type: int), 3 (type: int), 4 (type: int), 2 (type: int), 3 (type: int), null (type: int), null (type: int), 0 (type: int), 0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+ Statistics: Num rows: 500 Data size: 30008 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT position('abc', 'abcd'),
+ position('ccc', 'abcabc'),
+ position('23', 123),
+ position(23, 123),
+ position('abc', 'abcabc', 2),
+ position('abc', 'abcabc', '2'),
+ position(1, TRUE),
+ position(1, FALSE),
+ position(CAST('2' AS TINYINT), '12345'),
+ position('34', CAST('12345' AS SMALLINT)),
+ position('456', CAST('123456789012' AS BIGINT)),
+ position('.25', CAST(1.25 AS FLOAT)),
+ position('.0', CAST(16.0 AS DOUBLE)),
+ position(null, 'abc'),
+ position('abc', null),
+ position('abc', 'abcd', null),
+ position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1 0 2 2 4 4 0 0 2 3 4 2 3 NULL NULL 0 0
diff --git a/ql/src/test/results/clientpositive/udf_substr.q.out b/ql/src/test/results/clientpositive/udf_substr.q.out
index 4489006..00fa606 100644
--- a/ql/src/test/results/clientpositive/udf_substr.q.out
+++ b/ql/src/test/results/clientpositive/udf_substr.q.out
@@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr
POSTHOOK: type: DESCFUNCTION
substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substring
+Synonyms: mid, substring
pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
Example:
> SELECT substr('Facebook', 5) FROM src LIMIT 1;
diff --git a/ql/src/test/results/clientpositive/udf_substring.q.out b/ql/src/test/results/clientpositive/udf_substring.q.out
index 72898e6..d6b1c4a 100644
--- a/ql/src/test/results/clientpositive/udf_substring.q.out
+++ b/ql/src/test/results/clientpositive/udf_substring.q.out
@@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substring
POSTHOOK: type: DESCFUNCTION
substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substr
+Synonyms: mid, substr
pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
Example:
> SELECT substring('Facebook', 5) FROM src LIMIT 1;