You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2021/03/09 01:42:33 UTC

[incubator-pinot] branch master updated: Support identifier/literal as the derived column transform function (#6657)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 6644968  Support identifier/literal as the derived column transform function (#6657)
6644968 is described below

commit 66449688de744a119e32b114cd458beef17f7908
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Mon Mar 8 17:42:19 2021 -0800

    Support identifier/literal as the derived column transform function (#6657)
    
    This is useful for:
    - Changing the column name in source data
    - Filling the value for a column (can also be achieved via the default value)
---
 .../data/function/InbuiltFunctionEvaluator.java    | 76 +++++++++-------------
 .../context/utils/QueryContextConverterUtils.java  | 28 +++++---
 .../function/InbuiltFunctionEvaluatorTest.java     | 30 ++++++++-
 .../processing/framework/PartitionerTest.java      |  4 +-
 .../processing/framework/RecordFilterTest.java     |  4 +-
 .../framework/RecordTransformerTest.java           |  7 +-
 6 files changed, 83 insertions(+), 66 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluator.java
index 15800b8..ab6775d 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluator.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluator.java
@@ -31,21 +31,13 @@ import org.apache.pinot.spi.data.readers.GenericRow;
 
 
 /**
- * Evaluates a function expression.
- * <p>This is optimized for evaluating the an expression multiple times with different inputs.
- * <p>Overall idea
+ * Evaluates an expression.
+ * <p>This is optimized for evaluating an expression multiple times with different inputs.
+ * <p>Overall idea: parse the expression into an ExecutableNode, where an ExecutableNode can be:
  * <ul>
- *   <li>Parse the function expression into an expression tree</li>
- *   <li>Convert each node in the expression tree into and ExecutableNode</li>
- * </ul>
- * <p>An ExecutableNode can be a
- * <ul>
- *   <li>FunctionNode - executes another function</li>
+ *   <li>FunctionNode - executes a function</li>
  *   <li>ColumnNode - fetches the value of the column from the input GenericRow</li>
- *   <li>
- *     ConstantNode - returns the same value
- *     <p>Typically constant function arguments are represented using a ConstantNode
- *   </li>
+ *   <li>ConstantNode - returns the literal value</li>
  * </ul>
  */
 public class InbuiltFunctionEvaluator implements FunctionEvaluator {
@@ -56,42 +48,34 @@ public class InbuiltFunctionEvaluator implements FunctionEvaluator {
   public InbuiltFunctionEvaluator(String functionExpression) {
     _arguments = new ArrayList<>();
     ExpressionContext expression = QueryContextConverterUtils.getExpression(functionExpression);
-    Preconditions
-        .checkArgument(expression.getType() == ExpressionContext.Type.FUNCTION, "Invalid function expression: %s",
-            functionExpression);
-    _rootNode = planExecution(expression.getFunction());
+    _rootNode = planExecution(expression);
   }
 
-  private FunctionExecutionNode planExecution(FunctionContext function) {
-    List<ExpressionContext> arguments = function.getArguments();
-    int numArguments = arguments.size();
-    ExecutableNode[] childNodes = new ExecutableNode[numArguments];
-    for (int i = 0; i < numArguments; i++) {
-      ExpressionContext argument = arguments.get(i);
-      ExecutableNode childNode;
-      switch (argument.getType()) {
-        case FUNCTION:
-          childNode = planExecution(argument.getFunction());
-          break;
-        case IDENTIFIER:
-          String columnName = argument.getIdentifier();
-          childNode = new ColumnExecutionNode(columnName, _arguments.size());
-          _arguments.add(columnName);
-          break;
-        case LITERAL:
-          childNode = new ConstantExecutionNode(argument.getLiteral());
-          break;
-        default:
-          throw new IllegalStateException();
-      }
-      childNodes[i] = childNode;
-    }
-
-    FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo(function.getFunctionName(), numArguments);
-    Preconditions
-        .checkState(functionInfo != null, "Unsupported function: %s with %s parameters", function.getFunctionName(),
+  private ExecutableNode planExecution(ExpressionContext expression) {
+    switch (expression.getType()) {
+      case LITERAL:
+        return new ConstantExecutionNode(expression.getLiteral());
+      case IDENTIFIER:
+        String columnName = expression.getIdentifier();
+        ColumnExecutionNode columnExecutionNode = new ColumnExecutionNode(columnName, _arguments.size());
+        _arguments.add(columnName);
+        return columnExecutionNode;
+      case FUNCTION:
+        FunctionContext function = expression.getFunction();
+        List<ExpressionContext> arguments = function.getArguments();
+        int numArguments = arguments.size();
+        ExecutableNode[] childNodes = new ExecutableNode[numArguments];
+        for (int i = 0; i < numArguments; i++) {
+          childNodes[i] = planExecution(arguments.get(i));
+        }
+        String functionName = function.getFunctionName();
+        FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo(functionName, numArguments);
+        Preconditions.checkState(functionInfo != null, "Unsupported function: %s with %s parameters", functionName,
             numArguments);
-    return new FunctionExecutionNode(functionInfo, childNodes);
+        return new FunctionExecutionNode(functionInfo, childNodes);
+      default:
+        throw new IllegalStateException();
+    }
   }
 
   @Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/utils/QueryContextConverterUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/utils/QueryContextConverterUtils.java
index c99868c..6829c83 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/utils/QueryContextConverterUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/utils/QueryContextConverterUtils.java
@@ -131,11 +131,15 @@ public class QueryContextConverterUtils {
         FunctionDefinitionRegistry.isAggFunc(functionName) ? FunctionContext.Type.AGGREGATION
             : FunctionContext.Type.TRANSFORM;
     List<Expression> operands = thriftFunction.getOperands();
-    List<ExpressionContext> arguments = new ArrayList<>(operands.size());
-    for (Expression operand : operands) {
-      arguments.add(getExpression(operand));
+    if (operands != null) {
+      List<ExpressionContext> arguments = new ArrayList<>(operands.size());
+      for (Expression operand : operands) {
+        arguments.add(getExpression(operand));
+      }
+      return new FunctionContext(functionType, functionName, arguments);
+    } else {
+      return new FunctionContext(functionType, functionName, Collections.emptyList());
     }
-    return new FunctionContext(functionType, functionName, arguments);
   }
 
   /**
@@ -152,11 +156,15 @@ public class QueryContextConverterUtils {
         FunctionDefinitionRegistry.isAggFunc(functionName) ? FunctionContext.Type.AGGREGATION
             : FunctionContext.Type.TRANSFORM;
     List<? extends AstNode> children = astNode.getChildren();
-    List<ExpressionContext> arguments = new ArrayList<>(children.size());
-    for (AstNode child : children) {
-      arguments.add(getExpression(child));
+    if (children != null) {
+      List<ExpressionContext> arguments = new ArrayList<>(children.size());
+      for (AstNode child : children) {
+        arguments.add(getExpression(child));
+      }
+      return new FunctionContext(functionType, functionName, arguments);
+    } else {
+      return new FunctionContext(functionType, functionName, Collections.emptyList());
     }
-    return new FunctionContext(functionType, functionName, arguments);
   }
 
   /**
@@ -232,8 +240,8 @@ public class QueryContextConverterUtils {
         return new FilterContext(FilterContext.Type.PREDICATE, null,
             new TextMatchPredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
       case JSON_MATCH:
-          return new FilterContext(FilterContext.Type.PREDICATE, null,
-              new JsonMatchPredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
+        return new FilterContext(FilterContext.Type.PREDICATE, null,
+            new JsonMatchPredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
       case IS_NULL:
         return new FilterContext(FilterContext.Type.PREDICATE, null,
             new IsNullPredicate(getExpression(operands.get(0))));
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluatorTest.java
index c25b35b..9ea053e 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluatorTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/function/InbuiltFunctionEvaluatorTest.java
@@ -30,7 +30,31 @@ import static org.testng.Assert.assertTrue;
 public class InbuiltFunctionEvaluatorTest {
 
   @Test
-  public void testExpressionWithColumn() {
+  public void testColumnExpression() {
+    String expression = "testColumn";
+    InbuiltFunctionEvaluator evaluator = new InbuiltFunctionEvaluator(expression);
+    assertEquals(evaluator.getArguments(), Collections.singletonList("testColumn"));
+    GenericRow row = new GenericRow();
+    for (int i = 0; i < 5; i++) {
+      String value = "testValue" + i;
+      row.putValue("testColumn", value);
+      assertEquals(evaluator.evaluate(row), value);
+    }
+  }
+
+  @Test
+  public void testLiteralExpression() {
+    String expression = "'testValue'";
+    InbuiltFunctionEvaluator evaluator = new InbuiltFunctionEvaluator(expression);
+    assertTrue(evaluator.getArguments().isEmpty());
+    GenericRow row = new GenericRow();
+    for (int i = 0; i < 5; i++) {
+      assertEquals(evaluator.evaluate(row), "testValue");
+    }
+  }
+
+  @Test
+  public void testFunctionWithColumn() {
     String expression = "reverse(testColumn)";
     InbuiltFunctionEvaluator evaluator = new InbuiltFunctionEvaluator(expression);
     assertEquals(evaluator.getArguments(), Collections.singletonList("testColumn"));
@@ -43,7 +67,7 @@ public class InbuiltFunctionEvaluatorTest {
   }
 
   @Test
-  public void testExpressionWithConstant() {
+  public void testFunctionWithLiteral() {
     String expression = "reverse(12345)";
     InbuiltFunctionEvaluator evaluator = new InbuiltFunctionEvaluator(expression);
     assertTrue(evaluator.getArguments().isEmpty());
@@ -52,7 +76,7 @@ public class InbuiltFunctionEvaluatorTest {
   }
 
   @Test
-  public void testMultiFunctionExpression() {
+  public void testNestedFunction() {
     String expression = "reverse(reverse(testColumn))";
     InbuiltFunctionEvaluator evaluator = new InbuiltFunctionEvaluator(expression);
     assertEquals(evaluator.getArguments(), Collections.singletonList("testColumn"));
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/PartitionerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/PartitionerTest.java
index 046af76..fee53e6 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/PartitionerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/PartitionerTest.java
@@ -158,11 +158,11 @@ public class PartitionerTest {
     }
     partitionerConfig =
         new PartitionerConfig.Builder().setPartitionerType(PartitionerFactory.PartitionerType.TRANSFORM_FUNCTION)
-            .setTransformFunction("bad function").build();
+            .setTransformFunction("badFunction()").build();
     try {
       PartitionerFactory.getPartitioner(partitionerConfig);
       fail("Should not create TRANSFORM_FUNCTION Partitioner for invalid transform function");
-    } catch (IllegalArgumentException e) {
+    } catch (IllegalStateException e) {
       // expected
     }
 
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordFilterTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordFilterTest.java
index be5cd02..3b49f31 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordFilterTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordFilterTest.java
@@ -45,11 +45,11 @@ public class RecordFilterTest {
 
     recordFilterConfig =
         new RecordFilterConfig.Builder().setRecordFilterType(RecordFilterFactory.RecordFilterType.FILTER_FUNCTION)
-            .setFilterFunction("bad function").build();
+            .setFilterFunction("badFunction()").build();
     try {
       RecordFilterFactory.getRecordFilter(recordFilterConfig);
       fail("Should not pass for invalid filter function");
-    } catch (IllegalArgumentException e) {
+    } catch (IllegalStateException e) {
       // expected
     }
     recordFilterConfig =
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordTransformerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordTransformerTest.java
index 6633e1a..5b1753c 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordTransformerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/RecordTransformerTest.java
@@ -54,12 +54,12 @@ public class RecordTransformerTest {
     recordTransformer = RecordTransformerFactory.getRecordTransformer(config);
     assertEquals(recordTransformer.getClass(), TransformFunctionRecordTransformer.class);
 
-    transformFunctionMap.put("bar", "bad function");
+    transformFunctionMap.put("bar", "badFunction()");
     config = new RecordTransformerConfig.Builder().setTransformFunctionsMap(transformFunctionMap).build();
     try {
       RecordTransformerFactory.getRecordTransformer(config);
       fail("Should not create record transformer with invalid transform function");
-    } catch (IllegalArgumentException e) {
+    } catch (IllegalStateException e) {
       // expected
     }
   }
@@ -70,7 +70,8 @@ public class RecordTransformerTest {
     transformFunctionMap.put("foo", "toEpochDays(foo)");
     transformFunctionMap.put("bar", "Groovy({bar + \"_\" + zoo}, bar, zoo)");
     transformFunctionMap.put("dMv", "Groovy({dMv.findAll { it > 1}}, dMv)");
-    RecordTransformerConfig config = new RecordTransformerConfig.Builder().setTransformFunctionsMap(transformFunctionMap).build();
+    RecordTransformerConfig config =
+        new RecordTransformerConfig.Builder().setTransformFunctionsMap(transformFunctionMap).build();
     RecordTransformer recordTransformer = RecordTransformerFactory.getRecordTransformer(config);
     GenericRow row = new GenericRow();
     row.putValue("foo", 1587410614000L);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org