You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2022/04/21 08:01:53 UTC

[GitHub] [flink] matriv commented on a diff in pull request #19543: [FLINK-22857][table] Support evaluating expressions in SpecializedFunction

matriv commented on code in PR #19543:
URL: https://github.com/apache/flink/pull/19543#discussion_r854873873


##########
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/calcite/SqlToRexConverter.java:
##########
@@ -48,56 +42,50 @@ public class SqlExprToRexConverterImpl implements SqlExprToRexConverter {
 
     private final @Nullable RelDataType outputType;
 
-    public SqlExprToRexConverterImpl(
-            FrameworkConfig config,
-            FlinkTypeFactory typeFactory,
-            RelOptCluster cluster,
+    public SqlToRexConverter(
+            FlinkPlannerImpl planner,
             SqlDialect sqlDialect,
             RelDataType inputRowType,
             @Nullable RelDataType outputType) {
-        this.planner =
-                new FlinkPlannerImpl(
-                        config,
-                        (isLenient) -> createEmptyCatalogReader(typeFactory),
-                        typeFactory,
-                        cluster);
+        this.planner = planner;
         this.sqlDialect = sqlDialect;
         this.inputRowType = inputRowType;
         this.outputType = outputType;
     }
 
-    @Override
+    /**
+     * Converts the given SQL expression string to an expanded string with fully qualified function
+     * calls and escaped identifiers.
+     *
+     * <p>E.g. {@code my_udf(f0) + 1} to {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}
+     */
     public String expand(String expr) {
         final CalciteParser parser = planner.parser();
         final SqlNode node = parser.parseExpression(expr);
         final SqlNode validated = planner.validateExpression(node, inputRowType, outputType);
         return validated.toSqlString(sqlDialect).getSql();
     }
 
-    @Override
+    /**
+     * Converts a SQL expression to a {@link RexNode} expression.
+     *
+     * @param expr a SQL expression e.g. {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}
+     */
     public RexNode convertToRexNode(String expr) {
         final CalciteParser parser = planner.parser();
         return planner.rex(parser.parseExpression(expr), inputRowType, outputType);
     }
 
-    @Override
+    /**
+     * Converts an array of SQL expressions to an array of {@link RexNode} expressions.
+     *
+     * @param exprs a SQL expression e.g. {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}

Review Comment:
   ```suggestion
        * @param exprs an SQL expression e.g. {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}
   ```



##########
flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/calcite/SqlToRexConverter.java:
##########
@@ -48,56 +42,50 @@ public class SqlExprToRexConverterImpl implements SqlExprToRexConverter {
 
     private final @Nullable RelDataType outputType;
 
-    public SqlExprToRexConverterImpl(
-            FrameworkConfig config,
-            FlinkTypeFactory typeFactory,
-            RelOptCluster cluster,
+    public SqlToRexConverter(
+            FlinkPlannerImpl planner,
             SqlDialect sqlDialect,
             RelDataType inputRowType,
             @Nullable RelDataType outputType) {
-        this.planner =
-                new FlinkPlannerImpl(
-                        config,
-                        (isLenient) -> createEmptyCatalogReader(typeFactory),
-                        typeFactory,
-                        cluster);
+        this.planner = planner;
         this.sqlDialect = sqlDialect;
         this.inputRowType = inputRowType;
         this.outputType = outputType;
     }
 
-    @Override
+    /**
+     * Converts the given SQL expression string to an expanded string with fully qualified function
+     * calls and escaped identifiers.
+     *
+     * <p>E.g. {@code my_udf(f0) + 1} to {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}
+     */
     public String expand(String expr) {
         final CalciteParser parser = planner.parser();
         final SqlNode node = parser.parseExpression(expr);
         final SqlNode validated = planner.validateExpression(node, inputRowType, outputType);
         return validated.toSqlString(sqlDialect).getSql();
     }
 
-    @Override
+    /**
+     * Converts a SQL expression to a {@link RexNode} expression.
+     *
+     * @param expr a SQL expression e.g. {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}

Review Comment:
   ```suggestion
        * @param expr an SQL expression e.g. {@code `my_catalog`.`my_database`.`my_udf`(`f0`) + 1}
   ```



##########
flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/ExprCodeGenerator.scala:
##########
@@ -351,6 +351,15 @@ class ExprCodeGenerator(ctx: CodeGeneratorContext, nullableInput: Boolean)
   }
 
   override def visitInputRef(inputRef: RexInputRef): GeneratedExpression = {
+    // for specific custom code generation
+    if (input1Type == null) {

Review Comment:
   Is this check safe enough to conclude it's custom code? Feels a bit "hacky".



##########
flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/codegen/calls/BridgingFunctionGenUtil.scala:
##########
@@ -396,4 +405,183 @@ object BridgingFunctionGenUtil {
     val outputClass = outputDataType.map(_.getConversionClass).getOrElse(classOf[Unit])
     validateClassForRuntime(udf.getClass, methodName, argumentClasses, outputClass, functionName)
   }
+
+  class DefaultExpressionEvaluatorFactory(tableConfig: ReadableConfig, rexFactory: RexFactory)
+    extends ExpressionEvaluatorFactory {
+
+    override def createEvaluator(
+        function: BuiltInFunctionDefinition,
+        outputDataType: DataType,
+        args: DataType*): ExpressionEvaluator = {
+      val (argFields, call) = function match {
+        case BuiltInFunctionDefinitions.CAST | BuiltInFunctionDefinitions.TRY_CAST =>
+          Preconditions.checkArgument(args.length == 1, "Casting expects one arguments.", Seq(): _*)
+          val field = DataTypes.FIELD("arg0", args.head)
+          (
+            Seq(field),
+            unresolvedCall(function, unresolvedRef(field.getName), typeLiteral(outputDataType)))
+        case _ =>
+          val fields = args.zipWithIndex
+            .map { case (dataType, i) => DataTypes.FIELD(s"arg$i", dataType) }
+          val argRefs = fields.map(arg => unresolvedRef(arg.getName))
+          (fields, unresolvedCall(function, argRefs: _*))
+      }
+
+      createEvaluator(call, outputDataType, argFields: _*)
+    }
+
+    override def createEvaluator(
+        sqlExpression: String,
+        outputDataType: DataType,
+        args: DataTypes.Field*): ExpressionEvaluator = {
+      createEvaluator(callSql(sqlExpression), outputDataType, args: _*)
+    }
+
+    override def createEvaluator(
+        expression: Expression,
+        outputDataType: DataType,
+        args: DataTypes.Field*): ExpressionEvaluator = {
+      args.foreach(f => validateInputDataType(f.getDataType))
+      validateOutputDataType(outputDataType)
+
+      try {
+        createEvaluatorOrError(expression, outputDataType, args)
+      } catch {
+        case t: Throwable =>
+          throw new TableException(
+            s"Unable to create an expression evaluator for expression: $expression",
+            t)
+      }
+    }
+
+    private def createEvaluatorOrError(

Review Comment:
   Would be great to have some sample code in comments here, similarly to what we have for the Casting functions.
   Maybe refer to the example in `FunctionITCase` with the `RowEqualityScalarFunction` custom function?



##########
flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.functions;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.functions.BuiltInFunctionDefinitions;
+import org.apache.flink.types.Row;
+
+import java.time.LocalDate;
+import java.util.stream.Stream;
+
+import static org.apache.flink.table.api.Expressions.$;
+import static org.apache.flink.table.api.Expressions.row;
+
+/** Tests for {@link BuiltInFunctionDefinitions} around arrays. */
+class CollectionFunctionsITCase extends BuiltInFunctionTestBase {
+
+    @Override
+    Stream<TestSetSpec> getTestSetSpecs() {
+        return Stream.of(
+                TestSetSpec.forFunction(BuiltInFunctionDefinitions.ARRAY_CONTAINS)
+                        .onFieldsWithData(
+                                new Integer[] {1, 2, 3},
+                                null,

Review Comment:
   can we also have an array with null values? `new Integer[] { null, null, null}`
   and one more `new Integer[] {1, null, 2, null}` ?



##########
flink-table/flink-table-runtime/src/main/java/org/apache/flink/table/runtime/functions/scalar/ArrayContainsFunction.java:
##########
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.runtime.functions.scalar;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.functions.BuiltInFunctionDefinitions;
+import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.SpecializedFunction.ExpressionEvaluator;
+import org.apache.flink.table.functions.SpecializedFunction.SpecializedContext;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.util.FlinkRuntimeException;
+
+import javax.annotation.Nullable;
+
+import java.lang.invoke.MethodHandle;
+
+import static org.apache.flink.table.api.Expressions.$;
+
+/** Implementation of {@link BuiltInFunctionDefinitions#ARRAY_CONTAINS}. */
+@Internal
+public class ArrayContainsFunction extends BuiltInScalarFunction {
+
+    private final ArrayData.ElementGetter elementGetter;
+    private final ExpressionEvaluator equalityEvaluator;
+    private transient MethodHandle equalityHandle;
+
+    public ArrayContainsFunction(SpecializedContext context) {
+        super(BuiltInFunctionDefinitions.ARRAY_CONTAINS, context);
+        final DataType needleDataType = context.getCallContext().getArgumentDataTypes().get(1);
+        elementGetter = ArrayData.createElementGetter(needleDataType.getLogicalType());
+        equalityEvaluator =
+                context.createEvaluator(
+                        $("element").isEqual($("needle")),
+                        DataTypes.BOOLEAN(),
+                        DataTypes.FIELD("element", needleDataType.notNull().toInternal()),
+                        DataTypes.FIELD("needle", needleDataType.notNull().toInternal()));
+    }
+
+    @Override
+    public void open(FunctionContext context) throws Exception {
+        equalityHandle = equalityEvaluator.open(context);
+    }
+
+    public @Nullable Boolean eval(ArrayData haystack, Object needle) {
+        try {
+            if (haystack == null) {
+                return null;
+            }
+            final int size = haystack.size();
+            for (int pos = 0; pos < size; pos++) {
+                final Object element = elementGetter.getElementOrNull(haystack, pos);
+                // handle nullability before to avoid SQL three-value logic for equality

Review Comment:
   I don't get his comment, could you please explain what do you mean `before` ? where else could we handle this check?



##########
flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.functions;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.functions.BuiltInFunctionDefinitions;
+import org.apache.flink.types.Row;
+
+import java.time.LocalDate;
+import java.util.stream.Stream;
+
+import static org.apache.flink.table.api.Expressions.$;
+import static org.apache.flink.table.api.Expressions.row;
+
+/** Tests for {@link BuiltInFunctionDefinitions} around arrays. */
+class CollectionFunctionsITCase extends BuiltInFunctionTestBase {
+
+    @Override
+    Stream<TestSetSpec> getTestSetSpecs() {
+        return Stream.of(
+                TestSetSpec.forFunction(BuiltInFunctionDefinitions.ARRAY_CONTAINS)
+                        .onFieldsWithData(
+                                new Integer[] {1, 2, 3},
+                                null,
+                                new String[] {"Hello", "World"},
+                                new Row[] {
+                                    Row.of(true, LocalDate.of(2022, 4, 20)),
+                                    Row.of(true, LocalDate.of(1990, 10, 14))
+                                })
+                        .andDataTypes(
+                                DataTypes.ARRAY(DataTypes.INT()),
+                                DataTypes.ARRAY(DataTypes.INT()),
+                                DataTypes.ARRAY(DataTypes.STRING()).notNull(),
+                                DataTypes.ARRAY(
+                                        DataTypes.ROW(DataTypes.BOOLEAN(), DataTypes.DATE())))
+                        // ARRAY<INT>
+                        .testResult(
+                                $("f0").arrayContains(2),
+                                "ARRAY_CONTAINS(f0, 2)",
+                                true,
+                                DataTypes.BOOLEAN().nullable())
+                        .testResult(
+                                $("f0").arrayContains(42),
+                                "ARRAY_CONTAINS(f0, 42)",
+                                false,
+                                DataTypes.BOOLEAN().nullable())
+                        .testResult(
+                                $("f0").arrayContains(null),
+                                "ARRAY_CONTAINS(f0, NULL)",
+                                false,
+                                DataTypes.BOOLEAN().nullable())
+                        // ARRAY<INT> of null value
+                        .testResult(
+                                $("f1").arrayContains(12),

Review Comment:
   can we test also `$("f1").arrayContains(null)` ?



##########
flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/functions/CollectionFunctionsITCase.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.functions;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.functions.BuiltInFunctionDefinitions;
+import org.apache.flink.types.Row;
+
+import java.time.LocalDate;
+import java.util.stream.Stream;
+
+import static org.apache.flink.table.api.Expressions.$;
+import static org.apache.flink.table.api.Expressions.row;
+
+/** Tests for {@link BuiltInFunctionDefinitions} around arrays. */
+class CollectionFunctionsITCase extends BuiltInFunctionTestBase {
+
+    @Override
+    Stream<TestSetSpec> getTestSetSpecs() {
+        return Stream.of(
+                TestSetSpec.forFunction(BuiltInFunctionDefinitions.ARRAY_CONTAINS)
+                        .onFieldsWithData(
+                                new Integer[] {1, 2, 3},
+                                null,
+                                new String[] {"Hello", "World"},
+                                new Row[] {

Review Comment:
   Maybe it's also worths testing a `Row[]` which is null, and one more with null values, similarly to the comment above about the `Integer[]`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@flink.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org