You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by "ankitsultana (via GitHub)" <gi...@apache.org> on 2023/02/17 20:01:10 UTC

[GitHub] [pinot] ankitsultana commented on a diff in pull request #10286: [multistage] Initial (phase 1) Query runtime for window functions - empty OVER() and OVER(PARTITION BY)

ankitsultana commented on code in PR #10286:
URL: https://github.com/apache/pinot/pull/10286#discussion_r1110298242


##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java:
##########
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.runtime.operator;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.pinot.common.datablock.DataBlock;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.data.table.Key;
+import org.apache.pinot.query.planner.logical.RexExpression;
+import org.apache.pinot.query.runtime.blocks.TransferableBlock;
+import org.apache.pinot.query.runtime.blocks.TransferableBlockUtils;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * The WindowAggregateOperator is used to compute window function aggregations over a set of optional
+ * PARTITION BY keys, ORDER BY keys and a FRAME clause. The output data will include the projected
+ * columns and in addition will add the aggregation columns to the output data.
+ * [input columns, aggregate result1, ... aggregate resultN]
+ *
+ * The window functions supported today are SUM/COUNT/MIN/MAX aggregations. Window functions also include
+ * other types of functions such as rank and value functions.
+ *
+ * Unlike the AggregateOperator which will output one row per group, the WindowAggregateOperator
+ * will output as many rows as input rows.
+ *
+ * TODO:
+ *     1. Add support for OVER() clause with ORDER BY only or PARTITION BY ORDER BY
+ *     2. Add support for rank window functions
+ *     3. Add support for value window functions
+ *     4. Add support for custom frames
+ */
+public class WindowAggregateOperator extends MultiStageOperator {
+  private static final String EXPLAIN_NAME = "WINDOW";
+  private static final Logger LOGGER = LoggerFactory.getLogger(WindowAggregateOperator.class);
+
+  private final MultiStageOperator _inputOperator;
+  private final List<RexExpression> _groupSet;
+  private final OrderSetInfo _orderSetInfo;
+  private final WindowFrame _windowFrame;
+  private final List<RexExpression.FunctionCall> _aggCalls;
+  private final List<RexExpression> _constants;
+  private final DataSchema _resultSchema;
+  private final WindowAccumulator[] _windowAccumulators;
+  private final Map<Key, List<Object[]>> _partitionRows;
+
+  private TransferableBlock _upstreamErrorBlock;
+
+  private int _numRows;
+  private boolean _readyToConstruct;
+  private boolean _hasReturnedWindowAggregateBlock;
+
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      long requestId, int stageId) {
+    this(inputOperator, groupSet, orderSet, orderSetDirection, orderSetNullDirection, aggCalls, lowerBound,
+        upperBound, isRows, constants, resultSchema, inputSchema, WindowAccumulator.WINDOW_MERGERS,
+        requestId, stageId);
+  }
+
+  @VisibleForTesting
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      Map<String, Function<DataSchema.ColumnDataType, WindowMerger>> mergers, long requestId, int stageId) {
+    super(requestId, stageId);
+
+    boolean isPartitionByOnly = isPartitionByOnlyQuery(groupSet, orderSet, orderSetDirection, orderSetNullDirection);
+    // TODO: add support for ORDER BY in the OVER() clause
+    Preconditions.checkState(orderSet == null || orderSet.isEmpty() || isPartitionByOnly,
+        "Order by is not yet supported in window functions");
+
+    _inputOperator = inputOperator;
+    _groupSet = groupSet;
+    _orderSetInfo = new OrderSetInfo(orderSet, orderSetDirection, orderSetNullDirection);
+    _windowFrame = new WindowFrame(lowerBound, upperBound, isRows);
+
+    // TODO: add support for custom frames, and for ORDER BY default frame (upperBound => currentRow)
+    Preconditions.checkState(!_windowFrame.isRows(), "Only RANGE type frames are supported at present");
+    Preconditions.checkState(_windowFrame.isUnboundedPreceding(),
+        "Only default frame is supported, lowerBound must be UNBOUNDED PRECEDING");
+    Preconditions.checkState(_windowFrame.isUnboundedFollowing()
+            || (_windowFrame.isUpperBoundCurrentRow() && isPartitionByOnly),
+        "Only default frame is supported, upperBound must be UNBOUNDED FOLLOWING or CURRENT ROW");
+
+    // we expect all agg calls to be aggregate function calls
+    _aggCalls = aggCalls.stream().map(RexExpression.FunctionCall.class::cast).collect(Collectors.toList());
+    _constants = constants;
+    _resultSchema = resultSchema;
+
+    // TODO: Not all window functions (e.g. ROW_NUMBER, LAG, etc) need aggregations. Such functions should be handled
+    //       differently.
+    _windowAccumulators = new WindowAccumulator[_aggCalls.size()];
+    for (int i = 0; i < _aggCalls.size(); i++) {
+      RexExpression.FunctionCall agg = _aggCalls.get(i);
+      String functionName = agg.getFunctionName();
+      if (!mergers.containsKey(functionName)) {
+        throw new IllegalStateException("Unexpected value: " + functionName);
+      }
+      _windowAccumulators[i] = new WindowAccumulator(agg, mergers, functionName, inputSchema);
+    }
+
+    _partitionRows = new HashMap<>();
+
+    _numRows = 0;
+    _readyToConstruct = false;
+    _hasReturnedWindowAggregateBlock = false;
+  }
+
+  @Override
+  public List<MultiStageOperator> getChildOperators() {
+    return ImmutableList.of(_inputOperator);
+  }
+
+  @Nullable
+  @Override
+  public String toExplainString() {
+    return EXPLAIN_NAME;
+  }
+
+  @Override
+  protected TransferableBlock getNextBlock() {
+    try {
+      if (!_readyToConstruct && !consumeInputBlocks()) {
+        return TransferableBlockUtils.getNoOpTransferableBlock();
+      }
+
+      if (_upstreamErrorBlock != null) {
+        return _upstreamErrorBlock;
+      }
+
+      if (!_hasReturnedWindowAggregateBlock) {
+        return produceWindowAggregateBlock();
+      } else {
+        // TODO: Move to close call.
+        return TransferableBlockUtils.getEndOfStreamTransferableBlock();
+      }
+    } catch (Exception e) {
+      return TransferableBlockUtils.getErrorTransferableBlock(e);
+    }
+  }
+
+  private boolean isPartitionByOnlyQuery(List<RexExpression> groupSet, List<RexExpression> orderSet,
+      List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection) {
+    if (orderSet == null || orderSet.isEmpty()) {
+      return true;
+    }
+
+    if (groupSet == null || groupSet.isEmpty() || (groupSet.size() != orderSet.size())) {
+      return false;
+    }
+
+    Set<Integer> partitionByInputRefIndexes = new HashSet<>();
+    Set<Integer> orderByInputRefIndexes = new HashSet<>();
+    for (int i = 0; i < groupSet.size(); i++) {
+      partitionByInputRefIndexes.add(((RexExpression.InputRef) groupSet.get(i)).getIndex());
+      orderByInputRefIndexes.add(((RexExpression.InputRef) orderSet.get(i)).getIndex());
+    }
+
+    boolean isPartitionByOnly = partitionByInputRefIndexes.equals(orderByInputRefIndexes);
+    if (isPartitionByOnly) {
+      // Check the direction and null direction to ensure default ordering on the order by keys, which are:
+      // Direction: ASC
+      // Null Direction: LAST
+      for (int i = 0; i < orderSet.size(); i++) {
+        if (orderSetDirection.get(i) == RelFieldCollation.Direction.DESCENDING
+            || orderSetNullDirection.get(i) == RelFieldCollation.NullDirection.FIRST) {
+          isPartitionByOnly = false;
+          break;
+        }
+      }
+    }
+    return isPartitionByOnly;
+  }
+
+  private TransferableBlock produceWindowAggregateBlock() {
+    List<Object[]> rows = new ArrayList<>(_numRows);
+    for (Map.Entry<Key, List<Object[]>> e : _partitionRows.entrySet()) {
+      Key partitionKey = e.getKey();
+      List<Object[]> rowList = e.getValue();
+      for (Object[] existingRow : rowList) {
+        Object[] row = new Object[existingRow.length + _aggCalls.size()];
+        System.arraycopy(existingRow, 0, row, 0, existingRow.length);
+        for (int i = 0; i < _windowAccumulators.length; i++) {
+          row[i + existingRow.length] = _windowAccumulators[i]._results.get(partitionKey);
+        }
+        rows.add(row);
+      }
+    }
+    _hasReturnedWindowAggregateBlock = true;
+    if (rows.size() == 0) {

Review Comment:
   If there are no rows I think we should still return a regular `TransferableBlock` (not EOS), so that the upstream operators have at least one block that they can process, which may be needed to handle scenarios like aggregations without group-by.



##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java:
##########
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.runtime.operator;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.pinot.common.datablock.DataBlock;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.data.table.Key;
+import org.apache.pinot.query.planner.logical.RexExpression;
+import org.apache.pinot.query.runtime.blocks.TransferableBlock;
+import org.apache.pinot.query.runtime.blocks.TransferableBlockUtils;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * The WindowAggregateOperator is used to compute window function aggregations over a set of optional
+ * PARTITION BY keys, ORDER BY keys and a FRAME clause. The output data will include the projected
+ * columns and in addition will add the aggregation columns to the output data.
+ * [input columns, aggregate result1, ... aggregate resultN]
+ *
+ * The window functions supported today are SUM/COUNT/MIN/MAX aggregations. Window functions also include
+ * other types of functions such as rank and value functions.
+ *
+ * Unlike the AggregateOperator which will output one row per group, the WindowAggregateOperator
+ * will output as many rows as input rows.
+ *
+ * TODO:
+ *     1. Add support for OVER() clause with ORDER BY only or PARTITION BY ORDER BY
+ *     2. Add support for rank window functions
+ *     3. Add support for value window functions
+ *     4. Add support for custom frames
+ */
+public class WindowAggregateOperator extends MultiStageOperator {
+  private static final String EXPLAIN_NAME = "WINDOW";
+  private static final Logger LOGGER = LoggerFactory.getLogger(WindowAggregateOperator.class);
+
+  private final MultiStageOperator _inputOperator;
+  private final List<RexExpression> _groupSet;
+  private final OrderSetInfo _orderSetInfo;
+  private final WindowFrame _windowFrame;
+  private final List<RexExpression.FunctionCall> _aggCalls;
+  private final List<RexExpression> _constants;
+  private final DataSchema _resultSchema;
+  private final WindowAccumulator[] _windowAccumulators;
+  private final Map<Key, List<Object[]>> _partitionRows;
+
+  private TransferableBlock _upstreamErrorBlock;
+
+  private int _numRows;
+  private boolean _readyToConstruct;
+  private boolean _hasReturnedWindowAggregateBlock;
+
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      long requestId, int stageId) {
+    this(inputOperator, groupSet, orderSet, orderSetDirection, orderSetNullDirection, aggCalls, lowerBound,
+        upperBound, isRows, constants, resultSchema, inputSchema, WindowAccumulator.WINDOW_MERGERS,
+        requestId, stageId);
+  }
+
+  @VisibleForTesting
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      Map<String, Function<DataSchema.ColumnDataType, WindowMerger>> mergers, long requestId, int stageId) {
+    super(requestId, stageId);
+
+    boolean isPartitionByOnly = isPartitionByOnlyQuery(groupSet, orderSet, orderSetDirection, orderSetNullDirection);
+    // TODO: add support for ORDER BY in the OVER() clause
+    Preconditions.checkState(orderSet == null || orderSet.isEmpty() || isPartitionByOnly,
+        "Order by is not yet supported in window functions");
+
+    _inputOperator = inputOperator;
+    _groupSet = groupSet;
+    _orderSetInfo = new OrderSetInfo(orderSet, orderSetDirection, orderSetNullDirection);
+    _windowFrame = new WindowFrame(lowerBound, upperBound, isRows);
+
+    // TODO: add support for custom frames, and for ORDER BY default frame (upperBound => currentRow)
+    Preconditions.checkState(!_windowFrame.isRows(), "Only RANGE type frames are supported at present");
+    Preconditions.checkState(_windowFrame.isUnboundedPreceding(),
+        "Only default frame is supported, lowerBound must be UNBOUNDED PRECEDING");
+    Preconditions.checkState(_windowFrame.isUnboundedFollowing()
+            || (_windowFrame.isUpperBoundCurrentRow() && isPartitionByOnly),
+        "Only default frame is supported, upperBound must be UNBOUNDED FOLLOWING or CURRENT ROW");
+
+    // we expect all agg calls to be aggregate function calls
+    _aggCalls = aggCalls.stream().map(RexExpression.FunctionCall.class::cast).collect(Collectors.toList());
+    _constants = constants;
+    _resultSchema = resultSchema;
+
+    // TODO: Not all window functions (e.g. ROW_NUMBER, LAG, etc) need aggregations. Such functions should be handled
+    //       differently.
+    _windowAccumulators = new WindowAccumulator[_aggCalls.size()];
+    for (int i = 0; i < _aggCalls.size(); i++) {
+      RexExpression.FunctionCall agg = _aggCalls.get(i);
+      String functionName = agg.getFunctionName();
+      if (!mergers.containsKey(functionName)) {
+        throw new IllegalStateException("Unexpected value: " + functionName);
+      }
+      _windowAccumulators[i] = new WindowAccumulator(agg, mergers, functionName, inputSchema);
+    }
+
+    _partitionRows = new HashMap<>();
+
+    _numRows = 0;
+    _readyToConstruct = false;
+    _hasReturnedWindowAggregateBlock = false;
+  }
+
+  @Override
+  public List<MultiStageOperator> getChildOperators() {
+    return ImmutableList.of(_inputOperator);
+  }
+
+  @Nullable
+  @Override
+  public String toExplainString() {
+    return EXPLAIN_NAME;
+  }
+
+  @Override
+  protected TransferableBlock getNextBlock() {
+    try {
+      if (!_readyToConstruct && !consumeInputBlocks()) {
+        return TransferableBlockUtils.getNoOpTransferableBlock();
+      }
+
+      if (_upstreamErrorBlock != null) {
+        return _upstreamErrorBlock;
+      }
+
+      if (!_hasReturnedWindowAggregateBlock) {
+        return produceWindowAggregateBlock();
+      } else {
+        // TODO: Move to close call.
+        return TransferableBlockUtils.getEndOfStreamTransferableBlock();
+      }
+    } catch (Exception e) {
+      return TransferableBlockUtils.getErrorTransferableBlock(e);
+    }
+  }
+
+  private boolean isPartitionByOnlyQuery(List<RexExpression> groupSet, List<RexExpression> orderSet,
+      List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection) {
+    if (orderSet == null || orderSet.isEmpty()) {

Review Comment:
   nit: `CollectionUtils.isEmpty`.



##########
pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperator.java:
##########
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.query.runtime.operator;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.pinot.common.datablock.DataBlock;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.data.table.Key;
+import org.apache.pinot.query.planner.logical.RexExpression;
+import org.apache.pinot.query.runtime.blocks.TransferableBlock;
+import org.apache.pinot.query.runtime.blocks.TransferableBlockUtils;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * The WindowAggregateOperator is used to compute window function aggregations over a set of optional
+ * PARTITION BY keys, ORDER BY keys and a FRAME clause. The output data will include the projected
+ * columns and in addition will add the aggregation columns to the output data.
+ * [input columns, aggregate result1, ... aggregate resultN]
+ *
+ * The window functions supported today are SUM/COUNT/MIN/MAX aggregations. Window functions also include
+ * other types of functions such as rank and value functions.
+ *
+ * Unlike the AggregateOperator which will output one row per group, the WindowAggregateOperator
+ * will output as many rows as input rows.
+ *
+ * TODO:
+ *     1. Add support for OVER() clause with ORDER BY only or PARTITION BY ORDER BY
+ *     2. Add support for rank window functions
+ *     3. Add support for value window functions
+ *     4. Add support for custom frames
+ */
+public class WindowAggregateOperator extends MultiStageOperator {
+  private static final String EXPLAIN_NAME = "WINDOW";
+  private static final Logger LOGGER = LoggerFactory.getLogger(WindowAggregateOperator.class);
+
+  private final MultiStageOperator _inputOperator;
+  private final List<RexExpression> _groupSet;
+  private final OrderSetInfo _orderSetInfo;
+  private final WindowFrame _windowFrame;
+  private final List<RexExpression.FunctionCall> _aggCalls;
+  private final List<RexExpression> _constants;
+  private final DataSchema _resultSchema;
+  private final WindowAccumulator[] _windowAccumulators;
+  private final Map<Key, List<Object[]>> _partitionRows;
+
+  private TransferableBlock _upstreamErrorBlock;
+
+  private int _numRows;
+  private boolean _readyToConstruct;
+  private boolean _hasReturnedWindowAggregateBlock;
+
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      long requestId, int stageId) {
+    this(inputOperator, groupSet, orderSet, orderSetDirection, orderSetNullDirection, aggCalls, lowerBound,
+        upperBound, isRows, constants, resultSchema, inputSchema, WindowAccumulator.WINDOW_MERGERS,
+        requestId, stageId);
+  }
+
+  @VisibleForTesting
+  public WindowAggregateOperator(MultiStageOperator inputOperator, List<RexExpression> groupSet,
+      List<RexExpression> orderSet, List<RelFieldCollation.Direction> orderSetDirection,
+      List<RelFieldCollation.NullDirection> orderSetNullDirection, List<RexExpression> aggCalls, int lowerBound,
+      int upperBound, boolean isRows, List<RexExpression> constants, DataSchema resultSchema, DataSchema inputSchema,
+      Map<String, Function<DataSchema.ColumnDataType, WindowMerger>> mergers, long requestId, int stageId) {
+    super(requestId, stageId);
+
+    boolean isPartitionByOnly = isPartitionByOnlyQuery(groupSet, orderSet, orderSetDirection, orderSetNullDirection);
+    // TODO: add support for ORDER BY in the OVER() clause
+    Preconditions.checkState(orderSet == null || orderSet.isEmpty() || isPartitionByOnly,
+        "Order by is not yet supported in window functions");
+
+    _inputOperator = inputOperator;
+    _groupSet = groupSet;
+    _orderSetInfo = new OrderSetInfo(orderSet, orderSetDirection, orderSetNullDirection);
+    _windowFrame = new WindowFrame(lowerBound, upperBound, isRows);
+
+    // TODO: add support for custom frames, and for ORDER BY default frame (upperBound => currentRow)
+    Preconditions.checkState(!_windowFrame.isRows(), "Only RANGE type frames are supported at present");
+    Preconditions.checkState(_windowFrame.isUnboundedPreceding(),
+        "Only default frame is supported, lowerBound must be UNBOUNDED PRECEDING");
+    Preconditions.checkState(_windowFrame.isUnboundedFollowing()
+            || (_windowFrame.isUpperBoundCurrentRow() && isPartitionByOnly),
+        "Only default frame is supported, upperBound must be UNBOUNDED FOLLOWING or CURRENT ROW");
+
+    // we expect all agg calls to be aggregate function calls
+    _aggCalls = aggCalls.stream().map(RexExpression.FunctionCall.class::cast).collect(Collectors.toList());
+    _constants = constants;
+    _resultSchema = resultSchema;
+
+    // TODO: Not all window functions (e.g. ROW_NUMBER, LAG, etc) need aggregations. Such functions should be handled
+    //       differently.
+    _windowAccumulators = new WindowAccumulator[_aggCalls.size()];
+    for (int i = 0; i < _aggCalls.size(); i++) {
+      RexExpression.FunctionCall agg = _aggCalls.get(i);
+      String functionName = agg.getFunctionName();
+      if (!mergers.containsKey(functionName)) {
+        throw new IllegalStateException("Unexpected value: " + functionName);
+      }
+      _windowAccumulators[i] = new WindowAccumulator(agg, mergers, functionName, inputSchema);
+    }
+
+    _partitionRows = new HashMap<>();
+
+    _numRows = 0;
+    _readyToConstruct = false;
+    _hasReturnedWindowAggregateBlock = false;
+  }
+
+  @Override
+  public List<MultiStageOperator> getChildOperators() {
+    return ImmutableList.of(_inputOperator);
+  }
+
+  @Nullable
+  @Override
+  public String toExplainString() {
+    return EXPLAIN_NAME;
+  }
+
+  @Override
+  protected TransferableBlock getNextBlock() {
+    try {
+      if (!_readyToConstruct && !consumeInputBlocks()) {
+        return TransferableBlockUtils.getNoOpTransferableBlock();
+      }
+
+      if (_upstreamErrorBlock != null) {
+        return _upstreamErrorBlock;
+      }
+
+      if (!_hasReturnedWindowAggregateBlock) {
+        return produceWindowAggregateBlock();
+      } else {
+        // TODO: Move to close call.
+        return TransferableBlockUtils.getEndOfStreamTransferableBlock();
+      }
+    } catch (Exception e) {

Review Comment:
   Can you also log the exception here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org