You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2022/12/08 04:06:05 UTC

[calcite] branch main updated: [CALCITE-5389] Add STARTS_WITH and ENDS_WITH functions (enabled in BigQuery library)

This is an automated email from the ASF dual-hosted git repository.

jhyde pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 742d47795b [CALCITE-5389] Add STARTS_WITH and ENDS_WITH functions (enabled in BigQuery library)
742d47795b is described below

commit 742d47795b878576f8cbdab5f62b26d95559792a
Author: Oliver Lee <ol...@google.com>
AuthorDate: Tue Nov 22 21:49:24 2022 +0000

    [CALCITE-5389] Add STARTS_WITH and ENDS_WITH functions (enabled in BigQuery library)
    
    Close apache/calcite#2980
---
 babel/src/test/resources/sql/big-query.iq          | 59 ++++++++++++++++++++++
 .../calcite/adapter/enumerable/RexImpTable.java    |  4 ++
 .../org/apache/calcite/runtime/SqlFunctions.java   | 22 ++++++++
 .../calcite/sql/fun/SqlLibraryOperators.java       | 12 +++++
 .../org/apache/calcite/util/BuiltInMethod.java     |  2 +
 site/_docs/reference.md                            |  4 +-
 .../org/apache/calcite/test/SqlOperatorTest.java   | 48 ++++++++++++++++++
 7 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/babel/src/test/resources/sql/big-query.iq b/babel/src/test/resources/sql/big-query.iq
index 7795f54a2b..281604d291 100755
--- a/babel/src/test/resources/sql/big-query.iq
+++ b/babel/src/test/resources/sql/big-query.iq
@@ -2537,4 +2537,63 @@ SELECT PARSE_TIMESTAMP("%c", "Thu Dec 25 07:30:00 2008") AS parsed;
 !ok
 !}
 
+#####################################################################
+# STRING and BYTES functions ########################################
+
+#####################################################################
+# ENDS_WITH(value1, value2)
+#
+# Takes two STRING or BYTES values. Returns TRUE if the second
+# value is a suffix of the first.
+#
+# This function supports specifying collation.
+WITH items AS (
+  SELECT '1234' as item UNION ALL
+  SELECT '123' UNION ALL
+  SELECT '' UNION ALL
+  SELECT NULL
+)
+SELECT
+  item, ENDS_WITH(item, '34') as example
+FROM items;
++------+---------+
+| item | example |
++------+---------+
+|      | false   |
+| 123  | false   |
+| 1234 | true    |
+|      |         |
++------+---------+
+(4 rows)
+
+!ok
+
+#####################################################################
+# STARTS_WITH(value1, value2)
+#
+# Takes two STRING or BYTES values. Returns TRUE if the second
+# value is a prefix of the first.
+#
+# This function supports specifying collation.
+WITH items AS (
+  SELECT 'foo' as item UNION ALL
+  SELECT 'bar' UNION ALL
+  SELECT 'baz' UNION ALL
+  SELECT NULL
+)
+SELECT
+  item, STARTS_WITH(item, 'b') as example
+FROM items;
++------+---------+
+| item | example |
++------+---------+
+| bar  | true    |
+| baz  | true    |
+| foo  | false   |
+|      |         |
++------+---------+
+(4 rows)
+
+!ok
+
 # End big-query.iq
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 396f640786..4f7e62d65d 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -127,6 +127,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATEADD;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.DATE_FROM_UNIX_DATE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.DAYNAME;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.DIFFERENCE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.ENDS_WITH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
@@ -153,6 +154,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.SHA1;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.STRCMP;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.TANH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP_MICROS;
@@ -393,6 +395,8 @@ public class RexImpTable {
       defineMethod(SUBSTRING, BuiltInMethod.SUBSTRING.method, NullPolicy.STRICT);
       defineMethod(LEFT, BuiltInMethod.LEFT.method, NullPolicy.ANY);
       defineMethod(RIGHT, BuiltInMethod.RIGHT.method, NullPolicy.ANY);
+      defineMethod(STARTS_WITH, BuiltInMethod.STARTS_WITH.method, NullPolicy.STRICT);
+      defineMethod(ENDS_WITH, BuiltInMethod.ENDS_WITH.method, NullPolicy.STRICT);
       defineMethod(REPLACE, BuiltInMethod.REPLACE.method, NullPolicy.STRICT);
       defineMethod(TRANSLATE3, BuiltInMethod.TRANSLATE3.method, NullPolicy.STRICT);
       defineMethod(CHR, BuiltInMethod.CHAR_FROM_UTF8.method, NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index aa1ed8a926..41ef55a8d6 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -270,6 +270,28 @@ public class SqlFunctions {
     return flags;
   }
 
+  /** SQL {@code ENDS_WITH(string, string)} function. */
+  public static boolean endsWith(String s0, String s1) {
+    return s0.endsWith(s1);
+  }
+
+  /** SQL {@code ENDS_WITH(binary, binary)} function. */
+  public static boolean endsWith(ByteString s0, ByteString s1) {
+    return s0.length() >= s1.length()
+        && s0.substring(s0.length() - s1.length()).equals(s1);
+  }
+
+  /** SQL {@code STARTS_WITH(string, string)} function. */
+  public static boolean startsWith(String s0, String s1) {
+    return s0.startsWith(s1);
+  }
+
+  /** SQL {@code STARTS_WITH(binary, binary)} function. */
+  public static boolean startsWith(ByteString s0, ByteString s1) {
+    return s0.length() >= s1.length()
+        && s0.substring(0, s1.length()).equals(s1);
+  }
+
   /** SQL SUBSTRING(string FROM ...) function. */
   public static String substring(String c, int s) {
     final int s0 = s - 1;
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index f8ea6c7821..c7fb48a1a2 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -212,6 +212,18 @@ public abstract class SqlLibraryOperators {
           OperandTypes.STRING_INTEGER_OPTIONAL_INTEGER,
           SqlFunctionCategory.STRING);
 
+  /** The "ENDS_WITH(value1, value2)" function (BigQuery). */
+  @LibraryOperator(libraries = {BIG_QUERY})
+  public static final SqlFunction ENDS_WITH =
+      SqlBasicFunction.create("ENDS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
+          OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
+
+  /** The "STARTS_WITH(value1, value2)" function (BigQuery). */
+  @LibraryOperator(libraries = {BIG_QUERY})
+  public static final SqlFunction STARTS_WITH =
+      SqlBasicFunction.create("STARTS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
+          OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
+
   /** BigQuery's "SUBSTR(string, position [, substringLength ])" function. */
   @LibraryOperator(libraries = {BIG_QUERY})
   public static final SqlFunction SUBSTR_BIG_QUERY =
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 55d2f07b94..ba4c6d0f0c 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -397,6 +397,8 @@ public enum BuiltInMethod {
   INITCAP(SqlFunctions.class, "initcap", String.class),
   SUBSTRING(SqlFunctions.class, "substring", String.class, int.class,
       int.class),
+  STARTS_WITH(SqlFunctions.class, "startsWith", String.class, String.class),
+  ENDS_WITH(SqlFunctions.class, "endsWith", String.class, String.class),
   OCTET_LENGTH(SqlFunctions.class, "octetLength", ByteString.class),
   CHAR_LENGTH(SqlFunctions.class, "charLength", String.class),
   STRING_CONCAT(SqlFunctions.class, "concat", String.class, String.class),
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index c81ed03159..00e2c2048c 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2617,6 +2617,7 @@ semantics.
 | p | DATE_PART(timeUnit, datetime)                  | Equivalent to `EXTRACT(timeUnit FROM  datetime)`
 | o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified
 | p | DIFFERENCE(string, string)                     | Returns a measure of the similarity of two strings, namely the number of character positions that their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 if the `SOUNDEX` values are totally different
+| b | ENDS_WITH(string1, string2)                    | Returns whether *string2* is a suffix of *string1*
 | o | EXTRACT(xml, xpath, [, namespaces ])           | Returns the xml fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression
 | o | EXISTSNODE(xml, xpath, [, namespaces ])        | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
 | m | EXTRACTVALUE(xml, xpathExpr))                  | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
@@ -2650,8 +2651,9 @@ semantics.
 | b o | SINH(numeric)                                | Returns the hyperbolic sine of *numeric*
 | b m o p | SOUNDEX(string)                          | Returns the phonetic representation of *string*; throws if *string* is encoded with multi-byte encoding such as UTF-8
 | m | SPACE(integer)                                 | Returns a string of *integer* spaces; returns an empty string if *integer* is less than 1
-| b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set
+| b | STARTS_WITH(string1, string2)                  | Returns whether *string2* is a prefix of *string1*
 | m | STRCMP(string, string)                         | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one
+| b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set
 | b o | TANH(numeric)                                | Returns the hyperbolic tangent of *numeric*
 | b | TIMESTAMP_ADD(timestamp, interval int64 date_part) | Adds int64_expression units of date_part to the timestamp, independent of any time zone.
 | b | TIMESTAMP_MICROS(integer)                      | Returns the TIMESTAMP that is *integer* microseconds after 1970-01-01 00:00:00
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index ec03be71e3..2151ad08f4 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -5847,6 +5847,54 @@ public class SqlOperatorTest {
     f.checkNull("last_day(cast(null as timestamp))");
   }
 
+  @Test void testStartsWithFunction() {
+    final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.BIG_QUERY);
+    f.setFor(SqlLibraryOperators.STARTS_WITH);
+    f.checkBoolean("starts_with('12345', '123')", true);
+    f.checkBoolean("starts_with('12345', '1243')", false);
+    f.checkBoolean("starts_with(x'11', x'11')", true);
+    f.checkBoolean("starts_with(x'112211', x'33')", false);
+    f.checkFails("^starts_with('aabbcc', x'aa')^",
+        "Cannot apply 'STARTS_WITH' to arguments of type "
+            + "'STARTS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+            + "form\\(s\\): 'STARTS_WITH\\(<STRING>, <STRING>\\)'",
+        false);
+    f.checkNull("starts_with(null, null)");
+    f.checkNull("starts_with('12345', null)");
+    f.checkNull("starts_with(null, '123')");
+    f.checkBoolean("starts_with('', '123')", false);
+    f.checkBoolean("starts_with('', '')", true);
+    f.checkNull("starts_with(x'aa', null)");
+    f.checkNull("starts_with(null, x'aa')");
+    f.checkBoolean("starts_with(x'1234', x'')", true);
+    f.checkBoolean("starts_with(x'', x'123456')", false);
+    f.checkBoolean("starts_with(x'', x'')", true);
+  }
+
+  @Test void testEndsWithFunction() {
+    final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.BIG_QUERY);
+    f.setFor(SqlLibraryOperators.ENDS_WITH);
+    f.checkBoolean("ends_with('12345', '345')", true);
+    f.checkBoolean("ends_with('12345', '123')", false);
+    f.checkBoolean("ends_with(x'11', x'11')", true);
+    f.checkBoolean("ends_with(x'112211', x'33')", false);
+    f.checkFails("^ends_with('aabbcc', x'aa')^",
+        "Cannot apply 'ENDS_WITH' to arguments of type "
+            + "'ENDS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+            + "form\\(s\\): 'ENDS_WITH\\(<STRING>, <STRING>\\)'",
+        false);
+    f.checkNull("ends_with(null, null)");
+    f.checkNull("ends_with('12345', null)");
+    f.checkNull("ends_with(null, '123')");
+    f.checkBoolean("ends_with('', '123')", false);
+    f.checkBoolean("ends_with('', '')", true);
+    f.checkNull("ends_with(x'aa', null)");
+    f.checkNull("ends_with(null, x'aa')");
+    f.checkBoolean("ends_with(x'1234', x'')", true);
+    f.checkBoolean("ends_with(x'', x'123456')", false);
+    f.checkBoolean("ends_with(x'', x'')", true);
+  }
+
   /** Tests the {@code SUBSTRING} operator. Many test cases that used to be
    * have been moved to {@link SubFunChecker#assertSubFunReturns}, and are
    * called for both {@code SUBSTRING} and {@code SUBSTR}. */