You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by ch...@apache.org on 2019/05/17 04:26:48 UTC
[calcite] branch master updated: [CALCITE-2965] Implement string
functions: REPEAT, SPACE, SOUNDEX, DIFFERENCE
This is an automated email from the ASF dual-hosted git repository.
chunwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 4d04773 [CALCITE-2965] Implement string functions: REPEAT, SPACE, SOUNDEX, DIFFERENCE
4d04773 is described below
commit 4d04773054c5bda3b43c3dbeb2be01df19cbe3e3
Author: Chunwei Lei <ch...@alibaba-inc.com>
AuthorDate: Fri Mar 15 21:55:40 2019 +0800
[CALCITE-2965] Implement string functions: REPEAT, SPACE, SOUNDEX, DIFFERENCE
---
core/pom.xml | 4 ++
.../calcite/adapter/enumerable/RexImpTable.java | 8 +++
.../org/apache/calcite/runtime/SqlFunctions.java | 38 ++++++++++++
.../calcite/sql/fun/SqlLibraryOperators.java | 40 +++++++++++++
.../org/apache/calcite/sql/type/OperandTypes.java | 3 +
.../org/apache/calcite/sql/type/ReturnTypes.java | 21 +++++++
.../org/apache/calcite/util/BuiltInMethod.java | 4 ++
.../calcite/sql/test/SqlOperatorBaseTest.java | 69 ++++++++++++++++++++++
pom.xml | 6 ++
site/_docs/reference.md | 10 ++--
10 files changed, 198 insertions(+), 5 deletions(-)
diff --git a/core/pom.xml b/core/pom.xml
index a8fbb69..0502bbb 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -38,6 +38,10 @@ limitations under the License.
<!-- Sorted by groupId, artifactId; calcite dependencies first. Put versions
in dependencyManagement in the root POM, not here. -->
<dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.apache.calcite.avatica</groupId>
<artifactId>avatica-core</artifactId>
</dependency>
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index f1fe432..8159dc5 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -91,12 +91,16 @@ import static org.apache.calcite.linq4j.tree.ExpressionType.NotEqual;
import static org.apache.calcite.linq4j.tree.ExpressionType.OrElse;
import static org.apache.calcite.linq4j.tree.ExpressionType.Subtract;
import static org.apache.calcite.linq4j.tree.ExpressionType.UnaryPlus;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.DIFFERENCE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_DEPTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_KEYS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_LENGTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_PRETTY;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_REMOVE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_TYPE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRANSLATE3;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ABS;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ACOS;
@@ -286,6 +290,10 @@ public class RexImpTable {
defineMethod(OVERLAY, BuiltInMethod.OVERLAY.method, NullPolicy.STRICT);
defineMethod(POSITION, BuiltInMethod.POSITION.method, NullPolicy.STRICT);
defineMethod(ASCII, BuiltInMethod.ASCII.method, NullPolicy.STRICT);
+ defineMethod(REPEAT, BuiltInMethod.REPEAT.method, NullPolicy.STRICT);
+ defineMethod(SPACE, BuiltInMethod.SPACE.method, NullPolicy.STRICT);
+ defineMethod(SOUNDEX, BuiltInMethod.SOUNDEX.method, NullPolicy.STRICT);
+ defineMethod(DIFFERENCE, BuiltInMethod.DIFFERENCE.method, NullPolicy.STRICT);
final TrimImplementor trimImplementor = new TrimImplementor();
defineImplementor(TRIM, NullPolicy.STRICT, trimImplementor, false);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 99fd3ff..24b02fc 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -39,6 +39,10 @@ import org.apache.calcite.util.TimeWithTimeZoneString;
import org.apache.calcite.util.TimestampWithTimeZoneString;
import org.apache.calcite.util.Util;
+import org.apache.commons.codec.language.Soundex;
+
+import com.google.common.base.Strings;
+
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.math.BigInteger;
@@ -84,6 +88,10 @@ public class SqlFunctions {
private static final TimeZone LOCAL_TZ = TimeZone.getDefault();
+ private static final Soundex SOUNDEX = new Soundex();
+
+ private static final int SOUNDEX_LENGTH = 4;
+
private static final Function1<List<Object>, Enumerable<Object>> LIST_AS_ENUMERABLE =
Linq4j::asEnumerable;
@@ -212,6 +220,36 @@ public class SqlFunctions {
? 0 : s.codePointAt(0);
}
+ /** SQL REPEAT(string, int) function. */
+ public static String repeat(String s, int n) {
+ if (n < 1) {
+ return "";
+ }
+ return Strings.repeat(s, n);
+ }
+
+ /** SQL SPACE(int) function. */
+ public static String space(int n) {
+ return repeat(" ", n);
+ }
+
+ /** SQL SOUNDEX(string) function. */
+ public static String soundex(String s) {
+ return SOUNDEX.soundex(s);
+ }
+
+ /** SQL DIFFERENCE(string, string) function. */
+ public static int difference(String s0, String s1) {
+ String result0 = soundex(s0);
+ String result1 = soundex(s1);
+ for (int i = 0; i < SOUNDEX_LENGTH; i++) {
+ if (result0.charAt(i) != result1.charAt(i)) {
+ return i;
+ }
+ }
+ return SOUNDEX_LENGTH;
+ }
+
/** SQL CHARACTER_LENGTH(string) function. */
public static int charLength(String s) {
return s.length();
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index c48387a..1afbc24 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -149,6 +149,46 @@ public abstract class SqlLibraryOperators {
@LibraryOperator(libraries = {MYSQL})
public static final SqlFunction JSON_REMOVE = new SqlJsonRemoveFunction();
+
+ @LibraryOperator(libraries = {MYSQL, POSTGRESQL})
+ public static final SqlFunction REPEAT =
+ new SqlFunction(
+ "REPEAT",
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.ARG0_NULLABLE_VARYING,
+ null,
+ OperandTypes.STRING_INTEGER,
+ SqlFunctionCategory.STRING);
+
+ @LibraryOperator(libraries = {MYSQL})
+ public static final SqlFunction SPACE =
+ new SqlFunction(
+ "SPACE",
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.VARCHAR_2000_NULLABLE,
+ null,
+ OperandTypes.INTEGER,
+ SqlFunctionCategory.STRING);
+
+ @LibraryOperator(libraries = {MYSQL, POSTGRESQL, ORACLE})
+ public static final SqlFunction SOUNDEX =
+ new SqlFunction(
+ "SOUNDEX",
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.VARCHAR_4_NULLABLE,
+ null,
+ OperandTypes.CHARACTER,
+ SqlFunctionCategory.STRING);
+
+ @LibraryOperator(libraries = {POSTGRESQL})
+ public static final SqlFunction DIFFERENCE =
+ new SqlFunction(
+ "DIFFERENCE",
+ SqlKind.OTHER_FUNCTION,
+ ReturnTypes.INTEGER_NULLABLE,
+ null,
+ OperandTypes.STRING_STRING,
+ SqlFunctionCategory.STRING);
}
// End SqlLibraryOperators.java
diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
index 91f2246..5d12e12 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
@@ -405,6 +405,9 @@ public abstract class OperandTypes {
family(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.INTEGER, SqlTypeFamily.INTEGER);
+ public static final SqlSingleOperandTypeChecker STRING_INTEGER =
+ family(SqlTypeFamily.STRING, SqlTypeFamily.INTEGER);
+
/**
* Operand type-checking strategy where two operands must both be in the
* same string type family and last type is INTEGER.
diff --git a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
index a700a11..e05623f 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/ReturnTypes.java
@@ -283,10 +283,31 @@ public abstract class ReturnTypes {
cascade(BIGINT, SqlTypeTransforms.TO_NULLABLE);
/**
+ * Type-inference strategy that always returns "VARCHAR(4)".
+ */
+ public static final SqlReturnTypeInference VARCHAR_4 =
+ explicit(SqlTypeName.VARCHAR, 4);
+
+ /**
+ * Type-inference strategy that always returns "VARCHAR(4)" with nulls
+ * allowed if any of the operands allow nulls.
+ */
+ public static final SqlReturnTypeInference VARCHAR_4_NULLABLE =
+ cascade(VARCHAR_4, SqlTypeTransforms.TO_NULLABLE);
+
+ /**
* Type-inference strategy that always returns "VARCHAR(2000)".
*/
public static final SqlReturnTypeInference VARCHAR_2000 =
explicit(SqlTypeName.VARCHAR, 2000);
+
+ /**
+ * Type-inference strategy that always returns "VARCHAR(2000)" with nulls
+ * allowed if any of the operands allow nulls.
+ */
+ public static final SqlReturnTypeInference VARCHAR_2000_NULLABLE =
+ cascade(VARCHAR_2000, SqlTypeTransforms.TO_NULLABLE);
+
/**
* Type-inference strategy for Histogram agg support
*/
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index ac5f9bd..7646d4b 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -268,6 +268,10 @@ public enum BuiltInMethod {
UPPER(SqlFunctions.class, "upper", String.class),
LOWER(SqlFunctions.class, "lower", String.class),
ASCII(SqlFunctions.class, "ascii", String.class),
+ REPEAT(SqlFunctions.class, "repeat", String.class, int.class),
+ SPACE(SqlFunctions.class, "space", int.class),
+ SOUNDEX(SqlFunctions.class, "soundex", String.class),
+ DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
JSONIZE(JsonFunctions.class, "jsonize", Object.class),
DEJSONIZE(JsonFunctions.class, "dejsonize", String.class),
JSON_VALUE_EXPRESSION(JsonFunctions.class, "jsonValueExpression",
diff --git a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
index dedb9a9..7705f58 100644
--- a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
@@ -335,6 +335,19 @@ public abstract class SqlOperatorBaseTest {
.with("conformance", conformance));
}
+ /** Creates a tester with special sql library. */
+ protected SqlTester tester(SqlLibrary library) {
+ return tester.withOperatorTable(
+ SqlLibraryOperatorTableFactory.INSTANCE
+ .getOperatorTable(SqlLibrary.STANDARD, library))
+ .withConnectionFactory(
+ CalciteAssert.EMPTY_CONNECTION_FACTORY
+ .with(new CalciteAssert
+ .AddSchemaSpecPostProcessor(CalciteAssert.SchemaSpec.HR))
+ .with("fun", library.name())
+ );
+ }
+
//--- Tests -----------------------------------------------------------
/**
@@ -4255,6 +4268,62 @@ public abstract class SqlOperatorBaseTest {
tester.checkNull("ASCII(cast(null as varchar(1)))");
}
+ @Test public void testRepeatFunc() {
+ final SqlTester tester1 = tester(SqlLibrary.MYSQL);
+ tester1.setFor(SqlLibraryOperators.REPEAT);
+ tester1.checkString("REPEAT('a', -100)", "", "VARCHAR(1) NOT NULL");
+ tester1.checkString("REPEAT('a', -1)", "", "VARCHAR(1) NOT NULL");
+ tester1.checkString("REPEAT('a', 0)", "", "VARCHAR(1) NOT NULL");
+ tester1.checkString("REPEAT('a', 2)", "aa", "VARCHAR(1) NOT NULL");
+ tester1.checkString("REPEAT('abc', 3)", "abcabcabc", "VARCHAR(3) NOT NULL");
+ tester1.checkNull("REPEAT(cast(null as varchar(1)), -1)");
+ tester1.checkNull("REPEAT(cast(null as varchar(1)), 2)");
+ tester1.checkNull("REPEAT('abc', cast(null as integer))");
+ tester1.checkNull("REPEAT(cast(null as varchar(1)), cast(null as integer))");
+
+ }
+
+ @Test public void testSpaceFunc() {
+ final SqlTester tester1 = tester(SqlLibrary.MYSQL);
+ tester1.setFor(SqlLibraryOperators.SPACE);
+ tester1.checkString("SPACE(-100)", "", "VARCHAR(2000) NOT NULL");
+ tester1.checkString("SPACE(-1)", "", "VARCHAR(2000) NOT NULL");
+ tester1.checkString("SPACE(0)", "", "VARCHAR(2000) NOT NULL");
+ tester1.checkString("SPACE(2)", " ", "VARCHAR(2000) NOT NULL");
+ tester1.checkString("SPACE(5)", " ", "VARCHAR(2000) NOT NULL");
+ tester1.checkNull("SPACE(cast(null as integer))");
+ }
+
+ @Test public void testSoundexFunc() {
+ final SqlTester tester1 = oracleTester();
+ tester1.setFor(SqlLibraryOperators.SOUNDEX);
+ tester1.checkString("SOUNDEX('TECH ON THE NET')", "T253", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('Miller')", "M460", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('miler')", "M460", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('myller')", "M460", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('muller')", "M460", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('m')", "M000", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('mu')", "M000", "VARCHAR(4) NOT NULL");
+ tester1.checkString("SOUNDEX('mile')", "M400", "VARCHAR(4) NOT NULL");
+ tester1.checkNull("SOUNDEX(cast(null as varchar(1)))");
+ tester1.checkFails("SOUNDEX(_UTF8'\u5B57\u5B57')", "The character is not mapped.*", true);
+ }
+
+ @Test public void testDifferenceFunc() {
+ final SqlTester tester1 = tester(SqlLibrary.POSTGRESQL);
+ tester1.setFor(SqlLibraryOperators.DIFFERENCE);
+ tester1.checkScalarExact("DIFFERENCE('Miller', 'miller')", "4");
+ tester1.checkScalarExact("DIFFERENCE('Miller', 'myller')", "4");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'miller')", "4");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'miller')", "4");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'milk')", "2");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'mile')", "2");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'm')", "1");
+ tester1.checkScalarExact("DIFFERENCE('muller', 'lee')", "0");
+ tester1.checkNull("DIFFERENCE('muller', cast(null as varchar(1)))");
+ tester1.checkNull("DIFFERENCE(cast(null as varchar(1)), 'muller')");
+ }
+
@Test public void testUpperFunc() {
tester.setFor(SqlStdOperatorTable.UPPER);
tester.checkString("upper('a')", "A", "CHAR(1) NOT NULL");
diff --git a/pom.xml b/pom.xml
index b5f6161..e48816c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -72,6 +72,7 @@ limitations under the License.
<cassandra.version>3.11.2</cassandra.version>
<checksum-maven-plugin.version>1.2</checksum-maven-plugin.version>
<chinook-data-hsqldb.version>0.1</chinook-data-hsqldb.version>
+ <commons-codec.version>1.12</commons-codec.version>
<commons-dbcp2.version>2.5.0</commons-dbcp2.version>
<commons-lang3.version>3.8</commons-lang3.version>
<commons-pool2.version>2.6.0</commons-pool2.version>
@@ -237,6 +238,11 @@ limitations under the License.
<!-- Now third-party dependencies, sorted by groupId and artifactId. -->
<dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>${commons-codec.version}</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>${commons-dbcp2.version}</version>
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 17c9588..f36d5fd 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -1487,12 +1487,8 @@ period:
Not implemented:
* {fn CHAR(string)}
-* {fn DIFFERENCE(string, string)}
* {fn LEFT(string, integer)}
-* {fn REPEAT(string, integer)}
* {fn RIGHT(string, integer)}
-* {fn SOUNDEX(string)}
-* {fn SPACE(integer)}
#### Date/time
@@ -2108,7 +2104,11 @@ semantics.
| o | SUBSTR(string, position [, substring_length ]) | Returns a portion of *string*, beginning at character *position*, *substring_length* characters long. SUBSTR calculates lengths using characters as defined by the input character set
| o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions
| o | LEAST(expr [, expr ]* ) | Returns the least of the expressions
-| o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced
+| o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced
+| m o p | SOUNDEX(string) | Returns the phonetic representation of *string*. Throws exception when *string* is encoded with multi-byte encoding such as UTF-8
+| p | DIFFERENCE(string, string) | Returns the difference between the SOUNDEX values of two character expressions as an integer. For example, returns 4 if the SOUNDEX values are same and returns 0 if the SOUNDEX values are totally different.
+| m p | REPEAT(string, integer) | Returns a string of *integer* times *string*; Returns an empty string if *integer* is less than 1
+| m | SPACE(integer) | Returns a string of *integer* spaces; Returns an empty string if *integer* is less than 1
Note: