You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by mm...@apache.org on 2019/07/02 17:44:11 UTC
[calcite] branch master updated: [CALCITE-3063] Parse and process
PostgreSQL posix regular expressions
This is an automated email from the ASF dual-hosted git repository.
mmior pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new adf4cc4 [CALCITE-3063] Parse and process PostgreSQL posix regular expressions
adf4cc4 is described below
commit adf4cc4dc5cdb9f5e49c85d10f46a2fdcd831ccf
Author: Muhammad Gelbana <m....@gmail.com>
AuthorDate: Sat May 11 18:05:58 2019 +0200
[CALCITE-3063] Parse and process PostgreSQL posix regular expressions
---
babel/src/main/codegen/config.fmpp | 1 +
babel/src/main/codegen/includes/parserImpls.ftl | 8 ++
core/src/main/codegen/config.fmpp | 1 +
core/src/main/codegen/templates/Parser.jj | 20 ++-
.../calcite/adapter/enumerable/RexImpTable.java | 12 ++
.../calcite/rex/RexSqlStandardConvertletTable.java | 4 +
.../org/apache/calcite/runtime/SqlFunctions.java | 18 +++
.../main/java/org/apache/calcite/sql/SqlKind.java | 10 ++
.../calcite/sql/fun/SqlPosixRegexOperator.java | 136 +++++++++++++++++++++
.../calcite/sql/fun/SqlStdOperatorTable.java | 14 +++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
core/src/test/codegen/config.fmpp | 1 +
.../org/apache/calcite/test/SqlFunctionsTest.java | 21 ++++
.../org/apache/calcite/test/SqlValidatorTest.java | 4 +
server/src/main/codegen/config.fmpp | 1 +
15 files changed, 247 insertions(+), 5 deletions(-)
diff --git a/babel/src/main/codegen/config.fmpp b/babel/src/main/codegen/config.fmpp
index d46f8ca..630e2a0 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -865,6 +865,7 @@ data: {
"parserImpls.ftl"
]
+ includePosixOperators: true
includeCompoundIdentifier: true
includeBraces: true
includeAdditionalDeclarations: false
diff --git a/babel/src/main/codegen/includes/parserImpls.ftl b/babel/src/main/codegen/includes/parserImpls.ftl
index 934830a..3cbc702 100644
--- a/babel/src/main/codegen/includes/parserImpls.ftl
+++ b/babel/src/main/codegen/includes/parserImpls.ftl
@@ -42,4 +42,12 @@ SqlNode DateFunctionCall() :
}
}
+/* Extra operators */
+
+<DEFAULT, DQID, BTID> TOKEN :
+{
+ < NEGATE: "!" >
+| < TILDE: "~" >
+}
+
// End parserImpls.ftl
diff --git a/core/src/main/codegen/config.fmpp b/core/src/main/codegen/config.fmpp
index 9687338..8fc9cce 100644
--- a/core/src/main/codegen/config.fmpp
+++ b/core/src/main/codegen/config.fmpp
@@ -408,6 +408,7 @@ data: {
"parserImpls.ftl"
]
+ includePosixOperators: false
includeCompoundIdentifier: true
includeBraces: true
includeAdditionalDeclarations: false
diff --git a/core/src/main/codegen/templates/Parser.jj b/core/src/main/codegen/templates/Parser.jj
index 2ff4c72..0344aeb 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -3190,16 +3190,26 @@ List<Object> Expression2(ExprContext exprContext) :
s.clear().add(this);
}
(
- <NOT>
(
- <LIKE> { op = SqlStdOperatorTable.NOT_LIKE; }
+ <NOT>
+ (
+ <LIKE> { op = SqlStdOperatorTable.NOT_LIKE; }
+ |
+ <SIMILAR> <TO> { op = SqlStdOperatorTable.NOT_SIMILAR_TO; }
+ )
|
- <SIMILAR> <TO> { op = SqlStdOperatorTable.NOT_SIMILAR_TO; }
+ <LIKE> { op = SqlStdOperatorTable.LIKE; }
+ |
+ <SIMILAR> <TO> { op = SqlStdOperatorTable.SIMILAR_TO; }
)
+ <#if parser.includePosixOperators>
|
- <LIKE> { op = SqlStdOperatorTable.LIKE; }
+ <NEGATE> <TILDE> { op = SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE; }
+ [ <STAR> { op = SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE; } ]
|
- <SIMILAR> <TO> { op = SqlStdOperatorTable.SIMILAR_TO; }
+ <TILDE> { op = SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE; }
+ [ <STAR> { op = SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE; } ]
+ </#if>
)
list2 = Expression2(ExprContext.ACCEPT_SUB_QUERY) {
list.add(new SqlParserUtil.ToTreeListItem(op, s.pos()));
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 21baf7a..5178a5c 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -416,6 +416,18 @@ public class RexImpTable {
defineImplementor(NOT_SIMILAR_TO, NullPolicy.STRICT,
NotImplementor.of(similarImplementor), false);
+ // POSIX REGEX
+ final MethodImplementor posixRegexImplementor =
+ new MethodImplementor(BuiltInMethod.POSIX_REGEX.method);
+ defineImplementor(SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE, NullPolicy.STRICT,
+ posixRegexImplementor, false);
+ defineImplementor(SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE, NullPolicy.STRICT,
+ posixRegexImplementor, false);
+ defineImplementor(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE, NullPolicy.STRICT,
+ NotImplementor.of(posixRegexImplementor), false);
+ defineImplementor(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE, NullPolicy.STRICT,
+ NotImplementor.of(posixRegexImplementor), false);
+
// Multisets & arrays
defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java b/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
index 310a6b4..f25f0d0 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
@@ -58,6 +58,10 @@ public class RexSqlStandardConvertletTable
registerEquivOp(SqlStdOperatorTable.NOT_LIKE);
registerEquivOp(SqlStdOperatorTable.SIMILAR_TO);
registerEquivOp(SqlStdOperatorTable.NOT_SIMILAR_TO);
+ registerEquivOp(SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE);
+ registerEquivOp(SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE);
+ registerEquivOp(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE);
+ registerEquivOp(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE);
registerEquivOp(SqlStdOperatorTable.PLUS);
registerEquivOp(SqlStdOperatorTable.MINUS);
registerEquivOp(SqlStdOperatorTable.MULTIPLY);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index a0c6587..bea0558 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -97,6 +97,11 @@ public class SqlFunctions {
private static final Function1<List<Object>, Enumerable<Object>> LIST_AS_ENUMERABLE =
Linq4j::asEnumerable;
+ // It's important to have XDigit before Digit to match XDigit first
+ // (i.e. see the posixRegex method)
+ private static final String[] POSIX_CHARACTER_CLASSES = new String[] { "Lower", "Upper", "ASCII",
+ "Alpha", "XDigit", "Digit", "Alnum", "Punct", "Graph", "Print", "Blank", "Cntrl", "Space" };
+
private static final Function1<Object[], Enumerable<Object[]>> ARRAY_CARTESIAN_PRODUCT =
lists -> {
final List<Enumerator<Object>> enumerators = new ArrayList<>();
@@ -461,6 +466,19 @@ public class SqlFunctions {
return Pattern.matches(regex, s);
}
+ public static boolean posixRegex(String s, String regex, Boolean caseSensitive) {
+ // Replace existing character classes with java equivalent ones
+ String originalRegex = regex;
+ String[] existingExpressions = Arrays.stream(POSIX_CHARACTER_CLASSES)
+ .filter(v -> originalRegex.contains(v.toLowerCase(Locale.ROOT))).toArray(String[]::new);
+ for (String v : existingExpressions) {
+ regex = regex.replaceAll(v.toLowerCase(Locale.ROOT), "\\\\p{" + v + "}");
+ }
+
+ int flags = caseSensitive ? 0 : Pattern.CASE_INSENSITIVE;
+ return Pattern.compile(regex, flags).matcher(s).find();
+ }
+
// =
/** SQL <code>=</code> operator applied to BigDecimal values (neither may be
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index 78c427d..8fa3311 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -417,6 +417,16 @@ public enum SqlKind {
SIMILAR,
/**
+ * The "~" operator.
+ */
+ POSIX_REGEX_CASE_SENSITIVE,
+
+ /**
+ * The "~*" operator.
+ */
+ POSIX_REGEX_CASE_INSENSITIVE,
+
+ /**
* The "BETWEEN" operator.
*/
BETWEEN,
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java
new file mode 100644
index 0000000..4bd8853
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.sql.fun;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.sql.SqlBasicCall;
+import org.apache.calcite.sql.SqlBinaryOperator;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlLiteral;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlWriter;
+import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandCountRanges;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+
+import java.util.Arrays;
+
+/**
+ * An operator describing the <code>~</code> operator.
+ *
+ * <p> Syntax: <code>src-value [!] ~ [*] pattern-value</code>
+ */
+public class SqlPosixRegexOperator extends SqlBinaryOperator {
+ // ~ Instance fields --------------------------------------------------------
+
+ private final boolean caseSensitive;
+ private final boolean negated;
+
+ // ~ Constructors -----------------------------------------------------------
+
+ /**
+ * Creates a SqlPosixRegexOperator.
+ *
+ * @param name Operator name
+ * @param kind Kind
+ * @param negated Whether this is '!~' or '!~*'
+ */
+ SqlPosixRegexOperator(
+ String name,
+ SqlKind kind,
+ boolean caseSensitive,
+ boolean negated) {
+ super(
+ name,
+ kind,
+ 32,
+ true,
+ ReturnTypes.BOOLEAN_NULLABLE,
+ InferTypes.FIRST_KNOWN,
+ OperandTypes.STRING_SAME_SAME_SAME);
+ this.caseSensitive = caseSensitive;
+ this.negated = negated;
+ }
+
+ // ~ Methods ----------------------------------------------------------------
+
+ public SqlOperandCountRange getOperandCountRange() {
+ return SqlOperandCountRanges.between(2, 3);
+ }
+
+ public SqlCall createCall(
+ SqlLiteral functionQualifier,
+ SqlParserPos pos,
+ SqlNode... operands) {
+ pos = pos.plusAll(Arrays.asList(operands));
+ operands = Arrays.copyOf(operands, operands.length + 1);
+ operands[operands.length - 1] = SqlLiteral.createBoolean(caseSensitive, SqlParserPos.ZERO);
+ return new SqlBasicCall(this, operands, pos, false, functionQualifier);
+ }
+
+ public boolean checkOperandTypes(
+ SqlCallBinding callBinding,
+ boolean throwOnFailure) {
+ int operandCount = callBinding.getOperandCount();
+ if (operandCount != 2 && operandCount != 3) {
+ throw new AssertionError(
+ "Unexpected number of args to " + callBinding.getCall() + ": " + operandCount);
+ }
+
+ RelDataType op1Type = callBinding.getOperandType(0);
+ RelDataType op2Type = callBinding.getOperandType(1);
+
+ if (!SqlTypeUtil.isComparable(op1Type, op2Type)) {
+ throw new AssertionError(
+ "Incompatible first two operand types " + op1Type + " and " + op2Type);
+ }
+
+ return SqlTypeUtil.isCharTypeComparable(
+ callBinding,
+ callBinding.operands().subList(0, 2),
+ throwOnFailure);
+ }
+
+ public void unparse(
+ SqlWriter writer,
+ SqlCall call,
+ int leftPrec,
+ int rightPrec) {
+ final SqlWriter.Frame frame = writer.startList("", "");
+ call.operand(0).unparse(writer, getLeftPrec(), getRightPrec());
+
+ if (this.negated) {
+ writer.print("!");
+ }
+ writer.print("~");
+ if (!this.caseSensitive) {
+ writer.print("*");
+ }
+ writer.print(" ");
+
+ call.operand(1).unparse(writer, getLeftPrec(), getRightPrec());
+ writer.endList(frame);
+ }
+}
+
+// End SqlPosixRegexOperator.java
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
index 34a862a..b033ea2 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
@@ -1369,6 +1369,20 @@ public class SqlStdOperatorTable extends ReflectiveSqlOperatorTable {
public static final SqlSpecialOperator SIMILAR_TO =
new SqlLikeOperator("SIMILAR TO", SqlKind.SIMILAR, false);
+ public static final SqlBinaryOperator POSIX_REGEX_CASE_SENSITIVE = new SqlPosixRegexOperator(
+ "POSIX REGEX CASE SENSITIVE", SqlKind.POSIX_REGEX_CASE_SENSITIVE, true, false);
+
+ public static final SqlBinaryOperator POSIX_REGEX_CASE_INSENSITIVE = new SqlPosixRegexOperator(
+ "POSIX REGEX CASE INSENSITIVE", SqlKind.POSIX_REGEX_CASE_INSENSITIVE, false, false);
+
+ public static final SqlBinaryOperator NEGATED_POSIX_REGEX_CASE_SENSITIVE =
+ new SqlPosixRegexOperator("NEGATED POSIX REGEX CASE SENSITIVE",
+ SqlKind.POSIX_REGEX_CASE_SENSITIVE, true, true);
+
+ public static final SqlBinaryOperator NEGATED_POSIX_REGEX_CASE_INSENSITIVE =
+ new SqlPosixRegexOperator("NEGATED POSIX REGEX CASE INSENSITIVE",
+ SqlKind.POSIX_REGEX_CASE_INSENSITIVE, false, true);
+
/**
* Internal operator used to represent the ESCAPE clause of a LIKE or
* SIMILAR TO expression.
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index c84bfe7..4e9c242 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -348,6 +348,7 @@ public enum BuiltInMethod {
RTRIM(SqlFunctions.class, "rtrim", String.class),
LIKE(SqlFunctions.class, "like", String.class, String.class),
SIMILAR(SqlFunctions.class, "similar", String.class, String.class),
+ POSIX_REGEX(SqlFunctions.class, "posixRegex", String.class, String.class, Boolean.class),
IS_TRUE(SqlFunctions.class, "isTrue", Boolean.class),
IS_NOT_FALSE(SqlFunctions.class, "isNotFalse", Boolean.class),
NOT(SqlFunctions.class, "not", Boolean.class),
diff --git a/core/src/test/codegen/config.fmpp b/core/src/test/codegen/config.fmpp
index 8a451e3..330bb37 100644
--- a/core/src/test/codegen/config.fmpp
+++ b/core/src/test/codegen/config.fmpp
@@ -389,6 +389,7 @@ data: {
"parserImpls.ftl"
]
+ includePosixOperators: false
includeCompoundIdentifier: true
includeBraces: true
includeAdditionalDeclarations: false
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index 55d8811..fa91f34 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -39,6 +39,7 @@ import static org.apache.calcite.runtime.SqlFunctions.initcap;
import static org.apache.calcite.runtime.SqlFunctions.lesser;
import static org.apache.calcite.runtime.SqlFunctions.lower;
import static org.apache.calcite.runtime.SqlFunctions.ltrim;
+import static org.apache.calcite.runtime.SqlFunctions.posixRegex;
import static org.apache.calcite.runtime.SqlFunctions.rtrim;
import static org.apache.calcite.runtime.SqlFunctions.subtractMonths;
import static org.apache.calcite.runtime.SqlFunctions.trim;
@@ -72,6 +73,26 @@ public class SqlFunctionsTest {
assertEquals("nullb", concat(null, "b"));
}
+ @Test public void testPosixRegex() {
+ assertEquals(true, posixRegex("abc", "abc", true));
+ assertEquals(true, posixRegex("abc", "^a", true));
+ assertEquals(true, posixRegex("abc", "(b|d)", true));
+ assertEquals(false, posixRegex("abc", "^(b|c)", true));
+
+ assertEquals(true, posixRegex("abc", "ABC", false));
+ assertEquals(true, posixRegex("abc", "^A", false));
+ assertEquals(true, posixRegex("abc", "(B|D)", false));
+ assertEquals(false, posixRegex("abc", "^(B|C)", false));
+
+ assertEquals(false, posixRegex("abc", "^[[:xdigit:]]$", false));
+ assertEquals(true, posixRegex("abc", "^[[:xdigit:]]+$", false));
+ assertEquals(false, posixRegex("abcq", "^[[:xdigit:]]+$", false));
+
+ assertEquals(true, posixRegex("abc", "[[:xdigit:]]", false));
+ assertEquals(true, posixRegex("abc", "[[:xdigit:]]+", false));
+ assertEquals(true, posixRegex("abcq", "[[:xdigit:]]", false));
+ }
+
@Test public void testLower() {
assertEquals("a bcd iijk", lower("A bCd Iijk"));
}
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index e09881c..33bbbdd 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -8967,11 +8967,15 @@ public class SqlValidatorTest extends SqlValidatorTestCase {
+ "BETWEEN SYMMETRIC -\n"
+ "IN left\n"
+ "LIKE -\n"
+ + "NEGATED POSIX REGEX CASE INSENSITIVE left\n"
+ + "NEGATED POSIX REGEX CASE SENSITIVE left\n"
+ "NOT BETWEEN ASYMMETRIC -\n"
+ "NOT BETWEEN SYMMETRIC -\n"
+ "NOT IN left\n"
+ "NOT LIKE -\n"
+ "NOT SIMILAR TO -\n"
+ + "POSIX REGEX CASE INSENSITIVE left\n"
+ + "POSIX REGEX CASE SENSITIVE left\n"
+ "SIMILAR TO -\n"
+ "\n"
+ "$IS_DIFFERENT_FROM left\n"
diff --git a/server/src/main/codegen/config.fmpp b/server/src/main/codegen/config.fmpp
index 0fbb139..9c7e8f3 100644
--- a/server/src/main/codegen/config.fmpp
+++ b/server/src/main/codegen/config.fmpp
@@ -416,6 +416,7 @@ data: {
"parserImpls.ftl"
]
+ includePosixOperators: false
includeCompoundIdentifier: true
includeBraces: true
includeAdditionalDeclarations: false