You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2019/03/22 04:18:00 UTC

[hive] branch master updated: HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan)

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b181dc  HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan)
2b181dc is described below

commit 2b181dc7aeccf3d92968e68be0d3a9aabd0350e2
Author: Mani M <rm...@gmail.com>
AuthorDate: Fri Mar 22 09:46:43 2019 +0530

    HIVE-21283: Create Synonym mid for substr, position for locate (Mani M, reviewed by Sankar Hariappan)
    
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
---
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |   2 +
 .../org/apache/hadoop/hive/ql/udf/UDFSubstr.java   |   2 +-
 .../hive/ql/udf/generic/GenericUDFLocate.java      |   2 +-
 ql/src/test/queries/clientpositive/udf_mid.q       |   3 +
 ql/src/test/queries/clientpositive/udf_position.q  |  44 ++++++++
 .../results/clientpositive/show_functions.q.out    |   3 +
 .../test/results/clientpositive/udf_locate.q.out   |   1 +
 ql/src/test/results/clientpositive/udf_mid.q.out   |  21 ++++
 .../test/results/clientpositive/udf_position.q.out | 121 +++++++++++++++++++++
 .../test/results/clientpositive/udf_substr.q.out   |   2 +-
 .../results/clientpositive/udf_substring.q.out     |   2 +-
 11 files changed, 199 insertions(+), 4 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 3e4aa74..ed41bef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -193,6 +193,7 @@ public final class FunctionRegistry {
 
   static {
     system.registerGenericUDF("concat", GenericUDFConcat.class);
+    system.registerUDF("mid", UDFSubstr.class, false);
     system.registerUDF("substr", UDFSubstr.class, false);
     system.registerUDF("substring", UDFSubstr.class, false);
     system.registerGenericUDF("substring_index", GenericUDFSubstringIndex.class);
@@ -498,6 +499,7 @@ public final class FunctionRegistry {
     system.registerGenericUDF("in_file", GenericUDFInFile.class);
     system.registerGenericUDF("instr", GenericUDFInstr.class);
     system.registerGenericUDF("locate", GenericUDFLocate.class);
+    system.registerGenericUDF("position", GenericUDFLocate.class);
     system.registerGenericUDF("elt", GenericUDFElt.class);
     system.registerGenericUDF("concat_ws", GenericUDFConcatWS.class);
     system.registerGenericUDF("sort_array", GenericUDFSortArray.class);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
index 4136cc7..5b1964c 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
@@ -33,7 +33,7 @@ import org.apache.hadoop.io.Text;
  * UDFSubstr.
  *
  */
-@Description(name = "substr,substring",
+@Description(name = "substr,substring,mid",
     value = "_FUNC_(str, pos[, len]) - returns the substring of str that"
     + " starts at pos and is of length len or" +
     "_FUNC_(bin, pos[, len]) - returns the slice of byte array that"
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
index cebc752..a077222 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLocate.java
@@ -42,7 +42,7 @@ import org.apache.hadoop.io.Text;
  * </pre>
  * <p>
  */
-@Description(name = "locate",
+@Description(name = "locate,position",
     value = "_FUNC_(substr, str[, pos]) - Returns the position of the first "
     + "occurance of substr in str after position pos",
     extended = "Example:\n"
diff --git a/ql/src/test/queries/clientpositive/udf_mid.q b/ql/src/test/queries/clientpositive/udf_mid.q
new file mode 100644
index 0000000..a2a6526
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mid.q
@@ -0,0 +1,3 @@
+-- Synonym. See udf_substr.q
+DESCRIBE FUNCTION mid;
+DESCRIBE FUNCTION EXTENDED mid;
diff --git a/ql/src/test/queries/clientpositive/udf_position.q b/ql/src/test/queries/clientpositive/udf_position.q
new file mode 100644
index 0000000..232ed77
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_position.q
@@ -0,0 +1,44 @@
+--! qt:dataset:src
+set hive.fetch.task.conversion=more;
+
+DESCRIBE FUNCTION position;
+DESCRIBE FUNCTION EXTENDED position;
+
+EXPLAIN
+SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows);
+
+SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows);
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index dc2b436..0b772f0 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -168,6 +168,7 @@ mask_show_last_n
 matchpath
 max
 md5
+mid
 min
 minute
 mod
@@ -196,6 +197,7 @@ percentile_approx
 pi
 pmod
 posexplode
+position
 positive
 pow
 power
@@ -437,6 +439,7 @@ mask_show_last_n
 matchpath
 max
 md5
+mid
 min
 minute
 mod
diff --git a/ql/src/test/results/clientpositive/udf_locate.q.out b/ql/src/test/results/clientpositive/udf_locate.q.out
index 63213b3..cb3f63b 100644
--- a/ql/src/test/results/clientpositive/udf_locate.q.out
+++ b/ql/src/test/results/clientpositive/udf_locate.q.out
@@ -8,6 +8,7 @@ PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED locate
 POSTHOOK: type: DESCFUNCTION
 locate(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Synonyms: position
 Example:
   > SELECT locate('bar', 'foobarbar', 5) FROM src LIMIT 1;
   7
diff --git a/ql/src/test/results/clientpositive/udf_mid.q.out b/ql/src/test/results/clientpositive/udf_mid.q.out
new file mode 100644
index 0000000..0e28452
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mid.q.out
@@ -0,0 +1,21 @@
+PREHOOK: query: DESCRIBE FUNCTION mid
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mid
+POSTHOOK: type: DESCFUNCTION
+mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED mid
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED mid
+POSTHOOK: type: DESCFUNCTION
+mid(str, pos[, len]) - returns the substring of str that starts at pos and is of length len ormid(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
+Synonyms: substr, substring
+pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
+Example:
+   > SELECT mid('Facebook', 5) FROM src LIMIT 1;
+  'book'
+  > SELECT mid('Facebook', -5) FROM src LIMIT 1;
+  'ebook'
+  > SELECT mid('Facebook', 5, 1) FROM src LIMIT 1;
+  'b'
+Function class:org.apache.hadoop.hive.ql.udf.UDFSubstr
+Function type:BUILTIN
diff --git a/ql/src/test/results/clientpositive/udf_position.q.out b/ql/src/test/results/clientpositive/udf_position.q.out
new file mode 100644
index 0000000..ac5f6c0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_position.q.out
@@ -0,0 +1,121 @@
+PREHOOK: query: DESCRIBE FUNCTION position
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION position
+POSTHOOK: type: DESCFUNCTION
+position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED position
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED position
+POSTHOOK: type: DESCFUNCTION
+position(substr, str[, pos]) - Returns the position of the first occurance of substr in str after position pos
+Synonyms: locate
+Example:
+  > SELECT position('bar', 'foobarbar', 5) FROM src LIMIT 1;
+  7
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFLocate
+Function type:BUILTIN
+PREHOOK: query: EXPLAIN
+SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: src
+          Row Limit Per Split: 1
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: 1 (type: int), 0 (type: int), 2 (type: int), 2 (type: int), 4 (type: int), 4 (type: int), 0 (type: int), 0 (type: int), 2 (type: int), 3 (type: int), 4 (type: int), 2 (type: int), 3 (type: int), null (type: int), null (type: int), 0 (type: int), 0 (type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
+            Statistics: Num rows: 500 Data size: 30008 Basic stats: COMPLETE Column stats: COMPLETE
+            ListSink
+
+PREHOOK: query: SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT position('abc', 'abcd'),
+       position('ccc', 'abcabc'),
+       position('23', 123),
+       position(23, 123),
+       position('abc', 'abcabc', 2),
+       position('abc', 'abcabc', '2'),
+       position(1, TRUE),
+       position(1, FALSE),
+       position(CAST('2' AS TINYINT), '12345'),
+       position('34', CAST('12345' AS SMALLINT)),
+       position('456', CAST('123456789012' AS BIGINT)),
+       position('.25', CAST(1.25 AS FLOAT)),
+       position('.0', CAST(16.0 AS DOUBLE)),
+       position(null, 'abc'),
+       position('abc', null),
+       position('abc', 'abcd', null),
+       position('abc', 'abcd', 'invalid number')
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1	0	2	2	4	4	0	0	2	3	4	2	3	NULL	NULL	0	0
diff --git a/ql/src/test/results/clientpositive/udf_substr.q.out b/ql/src/test/results/clientpositive/udf_substr.q.out
index 4489006..00fa606 100644
--- a/ql/src/test/results/clientpositive/udf_substr.q.out
+++ b/ql/src/test/results/clientpositive/udf_substr.q.out
@@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substr
 POSTHOOK: type: DESCFUNCTION
 substr(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstr(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substring
+Synonyms: mid, substring
 pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
 Example:
    > SELECT substr('Facebook', 5) FROM src LIMIT 1;
diff --git a/ql/src/test/results/clientpositive/udf_substring.q.out b/ql/src/test/results/clientpositive/udf_substring.q.out
index 72898e6..d6b1c4a 100644
--- a/ql/src/test/results/clientpositive/udf_substring.q.out
+++ b/ql/src/test/results/clientpositive/udf_substring.q.out
@@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED substring
 POSTHOOK: type: DESCFUNCTION
 substring(str, pos[, len]) - returns the substring of str that starts at pos and is of length len orsubstring(bin, pos[, len]) - returns the slice of byte array that starts at pos and is of length len
-Synonyms: substr
+Synonyms: mid, substr
 pos is a 1-based index. If pos<0 the starting position is determined by counting backwards from the end of str.
 Example:
    > SELECT substring('Facebook', 5) FROM src LIMIT 1;