You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by mm...@apache.org on 2019/07/02 17:44:11 UTC

[calcite] branch master updated: [CALCITE-3063] Parse and process PostgreSQL posix regular expressions

This is an automated email from the ASF dual-hosted git repository.

mmior pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new adf4cc4  [CALCITE-3063] Parse and process PostgreSQL posix regular expressions
adf4cc4 is described below

commit adf4cc4dc5cdb9f5e49c85d10f46a2fdcd831ccf
Author: Muhammad Gelbana <m....@gmail.com>
AuthorDate: Sat May 11 18:05:58 2019 +0200

    [CALCITE-3063] Parse and process PostgreSQL posix regular expressions
---
 babel/src/main/codegen/config.fmpp                 |   1 +
 babel/src/main/codegen/includes/parserImpls.ftl    |   8 ++
 core/src/main/codegen/config.fmpp                  |   1 +
 core/src/main/codegen/templates/Parser.jj          |  20 ++-
 .../calcite/adapter/enumerable/RexImpTable.java    |  12 ++
 .../calcite/rex/RexSqlStandardConvertletTable.java |   4 +
 .../org/apache/calcite/runtime/SqlFunctions.java   |  18 +++
 .../main/java/org/apache/calcite/sql/SqlKind.java  |  10 ++
 .../calcite/sql/fun/SqlPosixRegexOperator.java     | 136 +++++++++++++++++++++
 .../calcite/sql/fun/SqlStdOperatorTable.java       |  14 +++
 .../org/apache/calcite/util/BuiltInMethod.java     |   1 +
 core/src/test/codegen/config.fmpp                  |   1 +
 .../org/apache/calcite/test/SqlFunctionsTest.java  |  21 ++++
 .../org/apache/calcite/test/SqlValidatorTest.java  |   4 +
 server/src/main/codegen/config.fmpp                |   1 +
 15 files changed, 247 insertions(+), 5 deletions(-)

diff --git a/babel/src/main/codegen/config.fmpp b/babel/src/main/codegen/config.fmpp
index d46f8ca..630e2a0 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -865,6 +865,7 @@ data: {
         "parserImpls.ftl"
       ]
 
+      includePosixOperators: true
       includeCompoundIdentifier: true
       includeBraces: true
       includeAdditionalDeclarations: false
diff --git a/babel/src/main/codegen/includes/parserImpls.ftl b/babel/src/main/codegen/includes/parserImpls.ftl
index 934830a..3cbc702 100644
--- a/babel/src/main/codegen/includes/parserImpls.ftl
+++ b/babel/src/main/codegen/includes/parserImpls.ftl
@@ -42,4 +42,12 @@ SqlNode DateFunctionCall() :
     }
 }
 
+/* Extra operators */
+
+<DEFAULT, DQID, BTID> TOKEN :
+{
+    < NEGATE: "!" >
+|   < TILDE: "~" >
+}
+
 // End parserImpls.ftl
diff --git a/core/src/main/codegen/config.fmpp b/core/src/main/codegen/config.fmpp
index 9687338..8fc9cce 100644
--- a/core/src/main/codegen/config.fmpp
+++ b/core/src/main/codegen/config.fmpp
@@ -408,6 +408,7 @@ data: {
       "parserImpls.ftl"
     ]
 
+    includePosixOperators: false
     includeCompoundIdentifier: true
     includeBraces: true
     includeAdditionalDeclarations: false
diff --git a/core/src/main/codegen/templates/Parser.jj b/core/src/main/codegen/templates/Parser.jj
index 2ff4c72..0344aeb 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -3190,16 +3190,26 @@ List<Object> Expression2(ExprContext exprContext) :
                     s.clear().add(this);
                 }
                 (
-                    <NOT>
                     (
-                        <LIKE> { op = SqlStdOperatorTable.NOT_LIKE; }
+                        <NOT>
+                        (
+                            <LIKE> { op = SqlStdOperatorTable.NOT_LIKE; }
+                        |
+                            <SIMILAR> <TO> { op = SqlStdOperatorTable.NOT_SIMILAR_TO; }
+                        )
                     |
-                        <SIMILAR> <TO> { op = SqlStdOperatorTable.NOT_SIMILAR_TO; }
+                        <LIKE> { op = SqlStdOperatorTable.LIKE; }
+                    |
+                        <SIMILAR> <TO> { op = SqlStdOperatorTable.SIMILAR_TO; }
                     )
+                <#if parser.includePosixOperators>
                 |
-                    <LIKE> { op = SqlStdOperatorTable.LIKE; }
+                    <NEGATE> <TILDE> { op = SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE; }
+                    [ <STAR> { op = SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE; } ]
                 |
-                    <SIMILAR> <TO> { op = SqlStdOperatorTable.SIMILAR_TO; }
+                    <TILDE> { op = SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE; }
+                    [ <STAR> { op = SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE; } ]
+                </#if>
                 )
                 list2 = Expression2(ExprContext.ACCEPT_SUB_QUERY) {
                     list.add(new SqlParserUtil.ToTreeListItem(op, s.pos()));
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 21baf7a..5178a5c 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -416,6 +416,18 @@ public class RexImpTable {
     defineImplementor(NOT_SIMILAR_TO, NullPolicy.STRICT,
         NotImplementor.of(similarImplementor), false);
 
+    // POSIX REGEX
+    final MethodImplementor posixRegexImplementor =
+        new MethodImplementor(BuiltInMethod.POSIX_REGEX.method);
+    defineImplementor(SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE, NullPolicy.STRICT,
+        posixRegexImplementor, false);
+    defineImplementor(SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE, NullPolicy.STRICT,
+        posixRegexImplementor, false);
+    defineImplementor(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE, NullPolicy.STRICT,
+        NotImplementor.of(posixRegexImplementor), false);
+    defineImplementor(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE, NullPolicy.STRICT,
+        NotImplementor.of(posixRegexImplementor), false);
+
     // Multisets & arrays
     defineMethod(CARDINALITY, BuiltInMethod.COLLECTION_SIZE.method,
         NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java b/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
index 310a6b4..f25f0d0 100644
--- a/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
+++ b/core/src/main/java/org/apache/calcite/rex/RexSqlStandardConvertletTable.java
@@ -58,6 +58,10 @@ public class RexSqlStandardConvertletTable
     registerEquivOp(SqlStdOperatorTable.NOT_LIKE);
     registerEquivOp(SqlStdOperatorTable.SIMILAR_TO);
     registerEquivOp(SqlStdOperatorTable.NOT_SIMILAR_TO);
+    registerEquivOp(SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE);
+    registerEquivOp(SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE);
+    registerEquivOp(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE);
+    registerEquivOp(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE);
     registerEquivOp(SqlStdOperatorTable.PLUS);
     registerEquivOp(SqlStdOperatorTable.MINUS);
     registerEquivOp(SqlStdOperatorTable.MULTIPLY);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index a0c6587..bea0558 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -97,6 +97,11 @@ public class SqlFunctions {
   private static final Function1<List<Object>, Enumerable<Object>> LIST_AS_ENUMERABLE =
       Linq4j::asEnumerable;
 
+  // It's important to have XDigit before Digit to match XDigit first
+  // (i.e. see the posixRegex method)
+  private static final String[] POSIX_CHARACTER_CLASSES = new String[] { "Lower", "Upper", "ASCII",
+      "Alpha", "XDigit", "Digit", "Alnum", "Punct", "Graph", "Print", "Blank", "Cntrl", "Space" };
+
   private static final Function1<Object[], Enumerable<Object[]>> ARRAY_CARTESIAN_PRODUCT =
       lists -> {
         final List<Enumerator<Object>> enumerators = new ArrayList<>();
@@ -461,6 +466,19 @@ public class SqlFunctions {
     return Pattern.matches(regex, s);
   }
 
+  public static boolean posixRegex(String s, String regex, Boolean caseSensitive) {
+    // Replace existing character classes with java equivalent ones
+    String originalRegex = regex;
+    String[] existingExpressions = Arrays.stream(POSIX_CHARACTER_CLASSES)
+        .filter(v -> originalRegex.contains(v.toLowerCase(Locale.ROOT))).toArray(String[]::new);
+    for (String v : existingExpressions) {
+      regex = regex.replaceAll(v.toLowerCase(Locale.ROOT), "\\\\p{" + v + "}");
+    }
+
+    int flags = caseSensitive ? 0 : Pattern.CASE_INSENSITIVE;
+    return Pattern.compile(regex, flags).matcher(s).find();
+  }
+
   // =
 
   /** SQL <code>=</code> operator applied to BigDecimal values (neither may be
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index 78c427d..8fa3311 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -417,6 +417,16 @@ public enum SqlKind {
   SIMILAR,
 
   /**
+   * The "~" operator.
+   */
+  POSIX_REGEX_CASE_SENSITIVE,
+
+  /**
+   * The "~*" operator.
+   */
+  POSIX_REGEX_CASE_INSENSITIVE,
+
+  /**
    * The "BETWEEN" operator.
    */
   BETWEEN,
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java
new file mode 100644
index 0000000..4bd8853
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.sql.fun;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.sql.SqlBasicCall;
+import org.apache.calcite.sql.SqlBinaryOperator;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlLiteral;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlOperandCountRange;
+import org.apache.calcite.sql.SqlWriter;
+import org.apache.calcite.sql.parser.SqlParserPos;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.OperandTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandCountRanges;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+
+import java.util.Arrays;
+
+/**
+ * An operator describing the <code>~</code> operator.
+ *
+ * <p> Syntax: <code>src-value [!] ~ [*] pattern-value</code>
+ */
+public class SqlPosixRegexOperator extends SqlBinaryOperator {
+  // ~ Instance fields --------------------------------------------------------
+
+  private final boolean caseSensitive;
+  private final boolean negated;
+
+  // ~ Constructors -----------------------------------------------------------
+
+  /**
+   * Creates a SqlPosixRegexOperator.
+   *
+   * @param name    Operator name
+   * @param kind    Kind
+   * @param negated Whether this is '!~' or '!~*'
+   */
+  SqlPosixRegexOperator(
+      String name,
+      SqlKind kind,
+      boolean caseSensitive,
+      boolean negated) {
+    super(
+        name,
+        kind,
+        32,
+        true,
+        ReturnTypes.BOOLEAN_NULLABLE,
+        InferTypes.FIRST_KNOWN,
+        OperandTypes.STRING_SAME_SAME_SAME);
+    this.caseSensitive = caseSensitive;
+    this.negated = negated;
+  }
+
+  // ~ Methods ----------------------------------------------------------------
+
+  public SqlOperandCountRange getOperandCountRange() {
+    return SqlOperandCountRanges.between(2, 3);
+  }
+
+  public SqlCall createCall(
+      SqlLiteral functionQualifier,
+      SqlParserPos pos,
+      SqlNode... operands) {
+    pos = pos.plusAll(Arrays.asList(operands));
+    operands = Arrays.copyOf(operands, operands.length + 1);
+    operands[operands.length - 1] = SqlLiteral.createBoolean(caseSensitive, SqlParserPos.ZERO);
+    return new SqlBasicCall(this, operands, pos, false, functionQualifier);
+  }
+
+  public boolean checkOperandTypes(
+      SqlCallBinding callBinding,
+      boolean throwOnFailure) {
+    int operandCount = callBinding.getOperandCount();
+    if (operandCount != 2 && operandCount != 3) {
+      throw new AssertionError(
+          "Unexpected number of args to " + callBinding.getCall() + ": " + operandCount);
+    }
+
+    RelDataType op1Type = callBinding.getOperandType(0);
+    RelDataType op2Type = callBinding.getOperandType(1);
+
+    if (!SqlTypeUtil.isComparable(op1Type, op2Type)) {
+      throw new AssertionError(
+          "Incompatible first two operand types " + op1Type + " and " + op2Type);
+    }
+
+    return SqlTypeUtil.isCharTypeComparable(
+        callBinding,
+        callBinding.operands().subList(0, 2),
+        throwOnFailure);
+  }
+
+  public void unparse(
+      SqlWriter writer,
+      SqlCall call,
+      int leftPrec,
+      int rightPrec) {
+    final SqlWriter.Frame frame = writer.startList("", "");
+    call.operand(0).unparse(writer, getLeftPrec(), getRightPrec());
+
+    if (this.negated) {
+      writer.print("!");
+    }
+    writer.print("~");
+    if (!this.caseSensitive) {
+      writer.print("*");
+    }
+    writer.print(" ");
+
+    call.operand(1).unparse(writer, getLeftPrec(), getRightPrec());
+    writer.endList(frame);
+  }
+}
+
+// End SqlPosixRegexOperator.java
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
index 34a862a..b033ea2 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java
@@ -1369,6 +1369,20 @@ public class SqlStdOperatorTable extends ReflectiveSqlOperatorTable {
   public static final SqlSpecialOperator SIMILAR_TO =
       new SqlLikeOperator("SIMILAR TO", SqlKind.SIMILAR, false);
 
+  public static final SqlBinaryOperator POSIX_REGEX_CASE_SENSITIVE = new SqlPosixRegexOperator(
+      "POSIX REGEX CASE SENSITIVE", SqlKind.POSIX_REGEX_CASE_SENSITIVE, true, false);
+
+  public static final SqlBinaryOperator POSIX_REGEX_CASE_INSENSITIVE = new SqlPosixRegexOperator(
+      "POSIX REGEX CASE INSENSITIVE", SqlKind.POSIX_REGEX_CASE_INSENSITIVE, false, false);
+
+  public static final SqlBinaryOperator NEGATED_POSIX_REGEX_CASE_SENSITIVE =
+      new SqlPosixRegexOperator("NEGATED POSIX REGEX CASE SENSITIVE",
+          SqlKind.POSIX_REGEX_CASE_SENSITIVE, true, true);
+
+  public static final SqlBinaryOperator NEGATED_POSIX_REGEX_CASE_INSENSITIVE =
+      new SqlPosixRegexOperator("NEGATED POSIX REGEX CASE INSENSITIVE",
+          SqlKind.POSIX_REGEX_CASE_INSENSITIVE, false, true);
+
   /**
    * Internal operator used to represent the ESCAPE clause of a LIKE or
    * SIMILAR TO expression.
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index c84bfe7..4e9c242 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -348,6 +348,7 @@ public enum BuiltInMethod {
   RTRIM(SqlFunctions.class, "rtrim", String.class),
   LIKE(SqlFunctions.class, "like", String.class, String.class),
   SIMILAR(SqlFunctions.class, "similar", String.class, String.class),
+  POSIX_REGEX(SqlFunctions.class, "posixRegex", String.class, String.class, Boolean.class),
   IS_TRUE(SqlFunctions.class, "isTrue", Boolean.class),
   IS_NOT_FALSE(SqlFunctions.class, "isNotFalse", Boolean.class),
   NOT(SqlFunctions.class, "not", Boolean.class),
diff --git a/core/src/test/codegen/config.fmpp b/core/src/test/codegen/config.fmpp
index 8a451e3..330bb37 100644
--- a/core/src/test/codegen/config.fmpp
+++ b/core/src/test/codegen/config.fmpp
@@ -389,6 +389,7 @@ data: {
         "parserImpls.ftl"
       ]
 
+      includePosixOperators: false
       includeCompoundIdentifier: true
       includeBraces: true
       includeAdditionalDeclarations: false
diff --git a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
index 55d8811..fa91f34 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlFunctionsTest.java
@@ -39,6 +39,7 @@ import static org.apache.calcite.runtime.SqlFunctions.initcap;
 import static org.apache.calcite.runtime.SqlFunctions.lesser;
 import static org.apache.calcite.runtime.SqlFunctions.lower;
 import static org.apache.calcite.runtime.SqlFunctions.ltrim;
+import static org.apache.calcite.runtime.SqlFunctions.posixRegex;
 import static org.apache.calcite.runtime.SqlFunctions.rtrim;
 import static org.apache.calcite.runtime.SqlFunctions.subtractMonths;
 import static org.apache.calcite.runtime.SqlFunctions.trim;
@@ -72,6 +73,26 @@ public class SqlFunctionsTest {
     assertEquals("nullb", concat(null, "b"));
   }
 
+  @Test public void testPosixRegex() {
+    assertEquals(true, posixRegex("abc", "abc", true));
+    assertEquals(true, posixRegex("abc", "^a", true));
+    assertEquals(true, posixRegex("abc", "(b|d)", true));
+    assertEquals(false, posixRegex("abc", "^(b|c)", true));
+
+    assertEquals(true, posixRegex("abc", "ABC", false));
+    assertEquals(true, posixRegex("abc", "^A", false));
+    assertEquals(true, posixRegex("abc", "(B|D)", false));
+    assertEquals(false, posixRegex("abc", "^(B|C)", false));
+
+    assertEquals(false, posixRegex("abc", "^[[:xdigit:]]$", false));
+    assertEquals(true, posixRegex("abc", "^[[:xdigit:]]+$", false));
+    assertEquals(false, posixRegex("abcq", "^[[:xdigit:]]+$", false));
+
+    assertEquals(true, posixRegex("abc", "[[:xdigit:]]", false));
+    assertEquals(true, posixRegex("abc", "[[:xdigit:]]+", false));
+    assertEquals(true, posixRegex("abcq", "[[:xdigit:]]", false));
+  }
+
   @Test public void testLower() {
     assertEquals("a bcd iijk", lower("A bCd Iijk"));
   }
diff --git a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
index e09881c..33bbbdd 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlValidatorTest.java
@@ -8967,11 +8967,15 @@ public class SqlValidatorTest extends SqlValidatorTestCase {
         + "BETWEEN SYMMETRIC -\n"
         + "IN left\n"
         + "LIKE -\n"
+        + "NEGATED POSIX REGEX CASE INSENSITIVE left\n"
+        + "NEGATED POSIX REGEX CASE SENSITIVE left\n"
         + "NOT BETWEEN ASYMMETRIC -\n"
         + "NOT BETWEEN SYMMETRIC -\n"
         + "NOT IN left\n"
         + "NOT LIKE -\n"
         + "NOT SIMILAR TO -\n"
+        + "POSIX REGEX CASE INSENSITIVE left\n"
+        + "POSIX REGEX CASE SENSITIVE left\n"
         + "SIMILAR TO -\n"
         + "\n"
         + "$IS_DIFFERENT_FROM left\n"
diff --git a/server/src/main/codegen/config.fmpp b/server/src/main/codegen/config.fmpp
index 0fbb139..9c7e8f3 100644
--- a/server/src/main/codegen/config.fmpp
+++ b/server/src/main/codegen/config.fmpp
@@ -416,6 +416,7 @@ data: {
         "parserImpls.ftl"
       ]
 
+      includePosixOperators: false
       includeCompoundIdentifier: true
       includeBraces: true
       includeAdditionalDeclarations: false