You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by de...@apache.org on 2022/05/13 01:15:05 UTC

[hive] branch master updated: HIVE-25230: Add position and occurrence to INSTR() (#2378) (Quanlong Huang, reviewed by Zhihua Deng)

This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new edd36e5c81 HIVE-25230: Add position and occurrence to INSTR() (#2378) (Quanlong Huang, reviewed by Zhihua Deng)
edd36e5c81 is described below

commit edd36e5c810c920825551519100354fb6ed59afd
Author: Quanlong Huang <hu...@gmail.com>
AuthorDate: Fri May 13 09:14:58 2022 +0800

    HIVE-25230: Add position and occurrence to INSTR() (#2378) (Quanlong Huang, reviewed by Zhihua Deng)
---
 .../hive/ql/udf/generic/GenericUDFInstr.java       | 135 ++++++++++++++++-----
 .../clientnegative/udf_instr_wrong_args_len2.q     |   2 +
 .../clientnegative/udf_instr_wrong_occurrence.q    |   2 +
 ql/src/test/queries/clientpositive/udf_instr.q     |  44 ++++++-
 .../clientnegative/udf_instr_wrong_args_len.q.out  |   2 +-
 .../clientnegative/udf_instr_wrong_args_len2.q.out |   1 +
 .../udf_instr_wrong_occurrence.q.out               |   1 +
 .../clientnegative/udf_instr_wrong_type.q.out      |   2 +-
 .../results/clientpositive/llap/udf_instr.q.out    | 110 +++++++++++++++--
 9 files changed, 258 insertions(+), 41 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java
index a5579b9a9c..f87254018a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFInstr.java
@@ -20,59 +20,82 @@ package org.apache.hadoop.hive.ql.udf.generic;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 
 /**
- * Generic UDF for string function <code>INSTR(str,substr)</code>. This mimcs
- * the function from MySQL
+ * Generic UDF for string function <code>INSTR(str,substr[,pos[,occurrence]])</code>.
+ * This extends the function from MySQL
  * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_instr
+ * and mimics the function from Oracle
+ * https://docs.oracle.com/database/121/SQLRF/functions089.htm#SQLRF00651
  *
  * <pre>
  * usage:
- * INSTR(str, substr)
+ * INSTR(str, substr[, pos[, occurrence]])
  * </pre>
  * <p>
  */
 @Description(name = "instr",
-    value = "_FUNC_(str, substr) - Returns the index of the first occurance of substr in str",
-    extended = "Example:\n"
-    + "  > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + "  5")
+    value = "_FUNC_(str, substr[, pos[, occurrence]]) " +
+        "- Returns the index of the given occurrence of substr in str after position pos",
+    extended = "pos is a 1-based index. If pos < 0, the starting position is\n" +
+        "determined by counting backwards from the end of str and then Hive\n" +
+        "searches backward from the resulting position.\n" +
+        "occurrence is also a 1-based index. The value must be positive.\n" +
+        "If occurrence is greater than the number of matching occurrences,\n" +
+        "the function returns 0.\n" +
+        "If either of the optional arguments, pos or occurrence, is NULL,\n" +
+        "the function also returns NULL.\n" +
+        "Example:\n" +
+        "  > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" +
+        "  5\n" +
+        "  > SELECT _FUNC_('CORPORATE FLOOR','OR', 3, 2) FROM src LIMIT 1;\n" +
+        "  14\n" +
+        "  > SELECT _FUNC_('CORPORATE FLOOR','OR', -3, 2) FROM src LIMIT 1;\n" +
+        "  2")
 public class GenericUDFInstr extends GenericUDF {
 
   private transient ObjectInspectorConverters.Converter[] converters;
+  private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes;
+  private transient Integer posConst;
+  private transient boolean isPosConst = false;
+  private transient Integer occurrenceConst;
+  private transient boolean isOccurrenceConst = false;
 
   @Override
   public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-    if (arguments.length != 2) {
-      throw new UDFArgumentLengthException(
-          "The function INSTR accepts exactly 2 arguments.");
-    }
+    checkArgsSize(arguments, 2, 4);
 
+    converters = new ObjectInspectorConverters.Converter[arguments.length];
+    inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[arguments.length];
     for (int i = 0; i < arguments.length; i++) {
-      Category category = arguments[i].getCategory();
-      if (category != Category.PRIMITIVE) {
-        throw new UDFArgumentTypeException(i, "The "
-            + GenericUDFUtils.getOrdinal(i + 1)
-            + " argument of function INSTR is expected to a "
-            + Category.PRIMITIVE.toString().toLowerCase() + " type, but "
-            + category.toString().toLowerCase() + " is found");
+      checkArgPrimitive(arguments, i);
+      if (i < 2) {
+        obtainStringConverter(arguments, i, inputTypes, converters);
+      } else {
+        obtainIntConverter(arguments, i, inputTypes, converters);
       }
     }
 
-    converters = new ObjectInspectorConverters.Converter[arguments.length];
-    for (int i = 0; i < arguments.length; i++) {
-      converters[i] = ObjectInspectorConverters.getConverter(arguments[i],
-          PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+    if (arguments.length > 2 && arguments[2] instanceof ConstantObjectInspector) {
+      posConst = getConstantIntValue(arguments, 2);
+      isPosConst = true;
+    }
+    if (arguments.length > 3 && arguments[3] instanceof ConstantObjectInspector) {
+      occurrenceConst = getConstantIntValue(arguments, 3);
+      isOccurrenceConst = true;
+      if (occurrenceConst != null && occurrenceConst <= 0) {
+        throw new UDFArgumentException("occurrence of function INSTR should be " +
+            "positive, got " + occurrenceConst);
+      }
     }
-
     return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
   }
 
@@ -80,19 +103,75 @@ public class GenericUDFInstr extends GenericUDF {
 
   @Override
   public Object evaluate(DeferredObject[] arguments) throws HiveException {
-    if (arguments[0].get() == null || arguments[1].get() == null) {
+    if (arguments[0].get() == null || arguments[1].get() == null
+        || (isPosConst && posConst == null)
+        || (isOccurrenceConst && occurrenceConst == null)) {
       return null;
     }
+    int pos = 1;
+    if (isPosConst) {
+      pos = posConst;
+    } else if (arguments.length > 2) {
+      IntWritable posWritable = (IntWritable) converters[2].convert(arguments[2].get());
+      if (posWritable == null) {
+        return null;
+      }
+      pos = posWritable.get();
+      if (pos == 0) {
+        intWritable.set(0);
+        return intWritable;
+      }
+    }
+    int occurrence = 1;
+    if (isOccurrenceConst) {
+      occurrence = occurrenceConst;
+    } else if (arguments.length > 3) {
+      IntWritable occurrenceWritable = (IntWritable) converters[3]
+          .convert(arguments[3].get());
+      if (occurrenceWritable == null) {
+        return null;
+      }
+      occurrence = occurrenceWritable.get();
+      if (occurrence <= 0) {
+        // The illegal occurrence is not a const value so we can't fail the query at
+        // semantic analysis. Just returns NULL for this row.
+        return null;
+      }
+    }
 
     Text text = (Text) converters[0].convert(arguments[0].get());
     Text subtext = (Text) converters[1].convert(arguments[1].get());
-    intWritable.set(GenericUDFUtils.findText(text, subtext, 0) + 1);
+    String textString = text.toString();
+    String subtextString = subtext.toString();
+
+    int matchPos = -1;
+    int matchNum;
+    if (pos > 0) {
+      int beg = pos - 1;
+      for (matchNum = 0; matchNum < occurrence && beg < textString.length(); matchNum++) {
+        matchPos = textString.indexOf(subtextString, beg);
+        if (matchPos == -1) break;
+        beg = matchPos + 1;
+      }
+    } else {
+      int beg = textString.length() + pos;
+      for (matchNum = 0; matchNum < occurrence && beg >= 0; matchNum++) {
+        matchPos = textString.lastIndexOf(subtextString, beg);
+        if (matchPos == -1) break;
+        beg = matchPos - 1;
+      }
+    }
+    if (matchNum < occurrence) {
+      matchPos = -1;
+    }
+
+    intWritable.set(matchPos + 1);
     return intWritable;
   }
 
   @Override
   public String getDisplayString(String[] children) {
-    assert (children.length == 2);
+    assert (children.length >= 2 && children.length <= 4);
     return getStandardDisplayString("instr", children);
   }
 }
diff --git a/ql/src/test/queries/clientnegative/udf_instr_wrong_args_len2.q b/ql/src/test/queries/clientnegative/udf_instr_wrong_args_len2.q
new file mode 100644
index 0000000000..fd02fc13cb
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/udf_instr_wrong_args_len2.q
@@ -0,0 +1,2 @@
+--! qt:dataset:src
+SELECT instr('abcd', 'bc', 1, 1, 1) FROM src;
diff --git a/ql/src/test/queries/clientnegative/udf_instr_wrong_occurrence.q b/ql/src/test/queries/clientnegative/udf_instr_wrong_occurrence.q
new file mode 100644
index 0000000000..46a911a679
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/udf_instr_wrong_occurrence.q
@@ -0,0 +1,2 @@
+--! qt:dataset:src
+SELECT instr('abcdabc', 'abc', 2, 0) FROM src;
diff --git a/ql/src/test/queries/clientpositive/udf_instr.q b/ql/src/test/queries/clientpositive/udf_instr.q
index a7e837e2a2..b7027b8184 100644
--- a/ql/src/test/queries/clientpositive/udf_instr.q
+++ b/ql/src/test/queries/clientpositive/udf_instr.q
@@ -17,7 +17,27 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows);
 
 SELECT instr('abcd', 'abc'),
@@ -32,5 +52,25 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows);
diff --git a/ql/src/test/results/clientnegative/udf_instr_wrong_args_len.q.out b/ql/src/test/results/clientnegative/udf_instr_wrong_args_len.q.out
index 25df5a5de3..fa7977b873 100644
--- a/ql/src/test/results/clientnegative/udf_instr_wrong_args_len.q.out
+++ b/ql/src/test/results/clientnegative/udf_instr_wrong_args_len.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch ''abcd'': The function INSTR accepts exactly 2 arguments.
+FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch ''abcd'': instr requires 2..4 argument(s), got 1
diff --git a/ql/src/test/results/clientnegative/udf_instr_wrong_args_len2.q.out b/ql/src/test/results/clientnegative/udf_instr_wrong_args_len2.q.out
new file mode 100644
index 0000000000..79e09fc5b0
--- /dev/null
+++ b/ql/src/test/results/clientnegative/udf_instr_wrong_args_len2.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10015]: Line 2:7 Arguments length mismatch '1': instr requires 2..4 argument(s), got 5
diff --git a/ql/src/test/results/clientnegative/udf_instr_wrong_occurrence.q.out b/ql/src/test/results/clientnegative/udf_instr_wrong_occurrence.q.out
new file mode 100644
index 0000000000..81f74b21b7
--- /dev/null
+++ b/ql/src/test/results/clientnegative/udf_instr_wrong_occurrence.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10014]: Line 2:7 Wrong arguments '0': occurrence of function INSTR should be positive, got 0
diff --git a/ql/src/test/results/clientnegative/udf_instr_wrong_type.q.out b/ql/src/test/results/clientnegative/udf_instr_wrong_type.q.out
index 9aa895b4eb..35e0d57823 100644
--- a/ql/src/test/results/clientnegative/udf_instr_wrong_type.q.out
+++ b/ql/src/test/results/clientnegative/udf_instr_wrong_type.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10016]: Line 3:21 Argument type mismatch 'lintstring': The 2nd argument of function INSTR is expected to a primitive type, but list is found
+FAILED: SemanticException [Error 10016]: Line 3:21 Argument type mismatch 'lintstring': instr only takes primitive types as 2nd argument, got LIST
diff --git a/ql/src/test/results/clientpositive/llap/udf_instr.q.out b/ql/src/test/results/clientpositive/llap/udf_instr.q.out
index 9c59287082..bf6d3ce84e 100644
--- a/ql/src/test/results/clientpositive/llap/udf_instr.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_instr.q.out
@@ -2,15 +2,27 @@ PREHOOK: query: DESCRIBE FUNCTION instr
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION instr
 POSTHOOK: type: DESCFUNCTION
-instr(str, substr) - Returns the index of the first occurance of substr in str
+instr(str, substr[, pos[, occurrence]]) - Returns the index of the given occurrence of substr in str after position pos
 PREHOOK: query: DESCRIBE FUNCTION EXTENDED instr
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: DESCRIBE FUNCTION EXTENDED instr
 POSTHOOK: type: DESCFUNCTION
-instr(str, substr) - Returns the index of the first occurance of substr in str
+instr(str, substr[, pos[, occurrence]]) - Returns the index of the given occurrence of substr in str after position pos
+pos is a 1-based index. If pos < 0, the starting position is
+determined by counting backwards from the end of str and then Hive
+searches backward from the resulting position.
+occurrence is also a 1-based index. The value must be positive.
+If occurrence is greater than the number of matching occurrences,
+the function returns 0.
+If either of the optional arguments, pos or occurrence, is NULL,
+the function also returns NULL.
 Example:
   > SELECT instr('Facebook', 'boo') FROM src LIMIT 1;
   5
+  > SELECT instr('CORPORATE FLOOR','OR', 3, 2) FROM src LIMIT 1;
+  14
+  > SELECT instr('CORPORATE FLOOR','OR', -3, 2) FROM src LIMIT 1;
+  2
 Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFInstr
 Function type:BUILTIN
 PREHOOK: query: EXPLAIN
@@ -26,7 +38,27 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -44,7 +76,27 @@ SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
@@ -61,8 +113,8 @@ STAGE PLANS:
           alias: src
           Row Limit Per Split: 1
           Select Operator
-            expressions: 1 (type: int), 0 (type: int), 2 (type: int), 2 (type: int), 0 (type: int), 0 (type: int), 2 (type: int), 3 (type: int), 4 (type: int), 2 (type: int), 3 (type: int), null (type: int), null (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+            expressions: 1 (type: int), 0 (type: int), 2 (type: int), 2 (type: int), 0 (type: int), 0 (type: int), 2 (type: int), 3 (type: int), 4 (type: int), 2 (type: int), 3 (type: int), null (type: int), null (type: int), 1 (type: int), 5 (type: int), 5 (type: int), 0 (type: int), 5 (type: int), 5 (type: int), 1 (type: int), 1 (type: int), 5 (type: int), 0 (type: int), 5 (type: int), 0 (type: int), 0 (type: int), 1 (type: int), 1 (type: int), 0 (type: int), null (type: int), null (ty [...]
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32
             ListSink
 
 PREHOOK: query: SELECT instr('abcd', 'abc'),
@@ -77,7 +129,27 @@ PREHOOK: query: SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -94,9 +166,29 @@ POSTHOOK: query: SELECT instr('abcd', 'abc'),
        instr(CAST(1.25 AS FLOAT), '.25'),
        instr(CAST(16.0 AS DOUBLE), '.0'),
        instr(null, 'abc'),
-       instr('abcd', null)
+       instr('abcd', null),
+       instr('abcdabcd', 'abc', 1),
+       instr('abcdabcd', 'abc', 2),
+       instr('abcdabcd', 'abc', 5),
+       instr('abcdabcd', 'abc', 6),
+       instr('abcdabcd', 'abc', -1),
+       instr('abcdabcd', 'abc', -4),
+       instr('abcdabcd', 'abc', -5),
+       instr('abcdabcd', 'abc', 1, 1),
+       instr('abcdabcd', 'abc', 1, 2),
+       instr('abcdabcd', 'abc', 1, 3),
+       instr('abcdabcd', 'abc', 2, 1),
+       instr('abcdabcd', 'abc', 2, 2),
+       instr('abcdabcd', 'abc', 5, 2),
+       instr('abcdabcd', 'abc', -1, 2),
+       instr('abcdabcd', 'abc', -4, 2),
+       instr('abcdabcd', 'abc', -5, 2),
+       instr('abcdabcd', 'abc', 1, null),
+       instr('abcdabcd', 'abc', null, 1),
+       instr('aaa', 'a', 3, 1),
+       instr('aaa', 'a', 3, 2)
 FROM src tablesample (1 rows)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
-1	0	2	2	0	0	2	3	4	2	3	NULL	NULL
+1	0	2	2	0	0	2	3	4	2	3	NULL	NULL	1	5	5	0	5	5	1	1	5	0	5	0	0	1	1	0	NULL	NULL	3	0