You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2020/07/07 03:30:08 UTC

[GitHub] [incubator-pinot] kishoreg commented on a change in pull request #5661: Optimize selection order-by when not all selected expressions are ordered

kishoreg commented on a change in pull request #5661:
URL: https://github.com/apache/incubator-pinot/pull/5661#discussion_r450590367



##########
File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/SelectionOrderByOperator.java
##########
@@ -143,24 +167,134 @@ public SelectionOrderByOperator(IndexSegment indexSegment, QueryContext queryCon
 
   @Override
   protected IntermediateResultsBlock getNextBlock() {
-    TransformBlock transformBlock;
-    while ((transformBlock = _transformOperator.nextBlock()) != null) {
-      int numExpressions = _expressions.size();
+    int numExpressions = _expressions.size();
+    int numOrderByExpressions = _orderByExpressions.size();
+
+    if (numExpressions == numOrderByExpressions) {
+      // All selected expressions are ordered
+
       BlockValSet[] blockValSets = new BlockValSet[numExpressions];
+      TransformBlock transformBlock;
+      while ((transformBlock = _transformOperator.nextBlock()) != null) {
+        for (int i = 0; i < numExpressions; i++) {
+          ExpressionContext expression = _expressions.get(i);
+          blockValSets[i] = transformBlock.getBlockValueSet(expression);
+        }
+        RowBasedBlockValueFetcher blockValueFetcher = new RowBasedBlockValueFetcher(blockValSets);
+        int numDocsFetched = transformBlock.getNumDocs();
+        _numDocsScanned += numDocsFetched;
+        for (int i = 0; i < numDocsFetched; i++) {
+          SelectionOperatorUtils.addToPriorityQueue(blockValueFetcher.getRow(i), _rows, _numRowsToKeep);
+        }
+      }
+      _numEntriesScannedPostFilter = (long) _numDocsScanned * _transformOperator.getNumColumnsProjected();
+
+      String[] columnNames = new String[numExpressions];
+      DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numExpressions];
       for (int i = 0; i < numExpressions; i++) {
-        ExpressionContext expression = _expressions.get(i);
-        blockValSets[i] = transformBlock.getBlockValueSet(expression);
+        columnNames[i] = _expressions.get(i).toString();
+        TransformResultMetadata expressionMetadata = _orderByExpressionMetadata[i];
+        columnDataTypes[i] = DataSchema.ColumnDataType
+            .fromDataType(expressionMetadata.getDataType(), expressionMetadata.isSingleValue());
       }
-      RowBasedBlockValueFetcher blockValueFetcher = new RowBasedBlockValueFetcher(blockValSets);
+      return new IntermediateResultsBlock(new DataSchema(columnNames, columnDataTypes), _rows);
+    } else {

Review comment:
       can we extract the if and else into separate methods for readability

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/SelectionOrderByOperator.java
##########
@@ -143,24 +167,134 @@ public SelectionOrderByOperator(IndexSegment indexSegment, QueryContext queryCon
 
   @Override
   protected IntermediateResultsBlock getNextBlock() {
-    TransformBlock transformBlock;
-    while ((transformBlock = _transformOperator.nextBlock()) != null) {
-      int numExpressions = _expressions.size();
+    int numExpressions = _expressions.size();
+    int numOrderByExpressions = _orderByExpressions.size();
+
+    if (numExpressions == numOrderByExpressions) {

Review comment:
       not sure if this a valid check. what if the expressions are functions? 

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/operator/query/SelectionOrderByOperator.java
##########
@@ -20,72 +20,96 @@
 
 import java.util.ArrayList;
 import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.PriorityQueue;
+import java.util.Set;
+import org.apache.pinot.common.utils.CommonConstants.Segment.BuiltInVirtualColumn;
 import org.apache.pinot.common.utils.DataSchema;
 import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.common.DataSource;
 import org.apache.pinot.core.common.RowBasedBlockValueFetcher;
 import org.apache.pinot.core.indexsegment.IndexSegment;
 import org.apache.pinot.core.operator.BaseOperator;
 import org.apache.pinot.core.operator.ExecutionStatistics;
+import org.apache.pinot.core.operator.ProjectionOperator;
+import org.apache.pinot.core.operator.blocks.DocIdSetBlock;
 import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
 import org.apache.pinot.core.operator.blocks.TransformBlock;
 import org.apache.pinot.core.operator.transform.TransformOperator;
 import org.apache.pinot.core.operator.transform.TransformResultMetadata;
+import org.apache.pinot.core.plan.DocIdSetPlanNode;
 import org.apache.pinot.core.query.request.context.ExpressionContext;
 import org.apache.pinot.core.query.request.context.OrderByExpressionContext;
 import org.apache.pinot.core.query.request.context.QueryContext;
 import org.apache.pinot.core.query.selection.SelectionOperatorUtils;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.utils.ByteArray;
+import org.roaringbitmap.IntIterator;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
+/**
+ * Operator for selection order-by queries.
+ * <p>The operator uses a priority queue to sort the rows and return the top rows based on the order-by expressions.
+ * <p>It is optimized to fetch only the values needed for the ordering purpose and the final result:
+ * <ul>
+ *   <li>
+ *     When all the selected expressions are ordered, the operator fetches all the expressions and insert them into the

Review comment:
       can we refer to them as output expressions and orderBy expressions?
   
   the two cases output expressions == orderby expressions
   
   orderby expression is small part of output expressions




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org