You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2022/02/02 06:00:15 UTC

[pinot] branch master updated: [7867] Handle Select * with Extra Columns (#7959)

This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ea2f0aa  [7867] Handle Select * with Extra Columns (#7959)
ea2f0aa is described below

commit ea2f0aa641e17301293662c8e79dfd94d8568438
Author: Prashant Pandey <84...@users.noreply.github.com>
AuthorDate: Wed Feb 2 11:29:49 2022 +0530

    [7867] Handle Select * with Extra Columns (#7959)
    
    * Expand '*' with other cols
    
    * added license header
    
    * Additional UTs
    
    * Additional UTs for happy case
    
    * linter
    
    * Checkstyle
    
    * Fix failing UT
    
    * Use imperative code instead of stream APIs for performance while expanding '*'
    
    * Expand * in natural ordering of columns
    
    * Linter fixes
    
    * Minor refactoring
    
    * Don't dedup columns, expand '*' request without other columns on broker
    
    * Add test description
    
    * Renamed from `expandStarExpressionToActualColumns` to `expandStarExpressionsToActualColumns`
    
    * Create new selections list instead of modifying the original
    
    * Update pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
---
 .../requesthandler/BaseBrokerRequestHandler.java   |  34 +++
 .../SelectStarWithOtherColsRewriteTest.java        | 276 +++++++++++++++++++++
 2 files changed, 310 insertions(+)

diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
index 7d026af..192ddd1 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java
@@ -106,6 +106,7 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler {
   private static final String IN_SUBQUERY = "inSubquery";
   private static final Expression FALSE = RequestUtils.getLiteralExpression(false);
   private static final Expression TRUE = RequestUtils.getLiteralExpression(true);
+  private static final Expression STAR = RequestUtils.getIdentifierExpression("*");
 
   protected final PinotConfiguration _config;
   protected final RoutingManager _routingManager;
@@ -1459,8 +1460,17 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler {
       Map<String, String> columnNameMap) {
     Map<String, String> aliasMap = new HashMap<>();
     if (pinotQuery != null) {
+      boolean hasStar = false;
       for (Expression expression : pinotQuery.getSelectList()) {
         fixColumnName(rawTableName, expression, columnNameMap, aliasMap, isCaseInsensitive);
+        //check if the select expression is '*'
+        if (!hasStar && expression.equals(STAR)) {
+          hasStar = true;
+        }
+      }
+      //if query has a '*' selection along with other columns
+      if (hasStar) {
+        expandStarExpressionsToActualColumns(pinotQuery, columnNameMap);
       }
       Expression filterExpression = pinotQuery.getFilterExpression();
       if (filterExpression != null) {
@@ -1487,6 +1497,30 @@ public abstract class BaseBrokerRequestHandler implements BrokerRequestHandler {
     }
   }
 
+  private static void expandStarExpressionsToActualColumns(PinotQuery pinotQuery, Map<String, String> columnNameMap) {
+    List<Expression> originalSelections = pinotQuery.getSelectList();
+    //expand '*'
+    List<Expression> expandedSelections = new ArrayList<>();
+    for (String tableCol : columnNameMap.values()) {
+      Expression newSelection = RequestUtils.createIdentifierExpression(tableCol);
+      //we exclude default virtual columns
+      if (tableCol.charAt(0) != '$') {
+        expandedSelections.add(newSelection);
+      }
+    }
+    //sort naturally
+    expandedSelections.sort(null);
+    List<Expression> newSelections = new ArrayList<>();
+    for (Expression originalSelection : originalSelections) {
+      if (originalSelection.equals(STAR)) {
+        newSelections.addAll(expandedSelections);
+      } else {
+        newSelections.add(originalSelection);
+      }
+    }
+    pinotQuery.setSelectList(newSelections);
+  }
+
   /**
    * Fixes the column names to the actual column names in the given broker request.
    */
diff --git a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java
new file mode 100644
index 0000000..90e0f12
--- /dev/null
+++ b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/SelectStarWithOtherColsRewriteTest.java
@@ -0,0 +1,276 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.pinot.broker.requesthandler;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.PinotQuery;
+import org.apache.pinot.sql.parsers.CalciteSqlParser;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+public class SelectStarWithOtherColsRewriteTest {
+
+  private static final Map<String, String> COL_MAP;
+
+  static {
+    //build table schema
+    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
+    builder.put("playerID", "playerID");
+    builder.put("homeRuns", "homeRuns");
+    builder.put("playerStint", "playerStint");
+    builder.put("groundedIntoDoublePlays", "groundedIntoDoublePlays");
+    builder.put("G_old", "G_old");
+    builder.put("$segmentName", "$segmentName");
+    builder.put("$docId", "$docId");
+    builder.put("$hostName", "$hostName");
+    COL_MAP = builder.build();
+  }
+
+  /**
+   * When the query contains only '*', it should be expanded into columns.
+   */
+  @Test
+  public void testShouldExpandWhenOnlyStarIsSelected() {
+    String sql = "SELECT * FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Map<String, Integer> countMap = new HashMap<>();
+    for (Expression selection : newSelections) {
+      String col = selection.getIdentifier().getName();
+      countMap.put(col, countMap.getOrDefault(col, 0) + 1);
+    }
+    Assert.assertEquals(countMap.size(), 5, "More new selections than expected");
+    Assert.assertTrue(countMap.keySet().containsAll(COL_MAP.keySet().stream().filter(a -> !a.startsWith("$")).collect(
+        Collectors.toList())), "New selections contain virtual columns");
+    countMap.forEach(
+        (key, value) -> Assert.assertEquals((int) value, 1, key + " has more than one occurrences in new selection"));
+  }
+
+  /**
+   * Expansion should not contain any virtual columns
+   */
+  @Test
+  public void testShouldNotReturnExtraDefaultColumns() {
+    String sql = "SELECT $docId,*,$segmentName FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    int docIdCnt = 0;
+    int segmentNameCnt = 0;
+    for (Expression newSelection : newSelections) {
+      String colName = newSelection.getIdentifier().getName();
+      switch (colName) {
+        case "$docId":
+          docIdCnt++;
+          break;
+        case "$segmentName":
+          segmentNameCnt++;
+          break;
+        case "$hostName":
+          throw new RuntimeException("Extra default column returned");
+        default:
+      }
+    }
+    Assert.assertEquals(docIdCnt, 1);
+    Assert.assertEquals(segmentNameCnt, 1);
+  }
+
+  /**
+   * Columns should not be deduped
+   */
+  @Test
+  public void testShouldNotDedupMultipleRequestedColumns() {
+    String sql = "SELECT playerID,*,G_old FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    int playerIdCnt = 0;
+    int goldCount = 0;
+    for (Expression newSelection : newSelections) {
+      String colName = newSelection.getIdentifier().getName();
+      switch (colName) {
+        case "playerID":
+          playerIdCnt++;
+          break;
+        case "G_old":
+          goldCount++;
+          break;
+        default:
+      }
+    }
+    Assert.assertEquals(playerIdCnt, 2, "playerID does not occur once");
+    Assert.assertEquals(goldCount, 2, "G_old occurs does not occur once");
+  }
+
+  /**
+   * Selections should be returned in the requested order
+   */
+  @Test
+  public void testSelectionOrder() {
+    String sql = "SELECT playerID,*,G_old FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "playerID");
+    Assert.assertEquals(newSelections.get(newSelections.size() - 1).getIdentifier().getName(), "G_old");
+    //the expanded list should be alphabetically sorted
+    List<Expression> expandedSelections = newSelections.subList(1, newSelections.size() - 1);
+    List<Expression> assumedUnsortedSelections = new ArrayList<>(expandedSelections);
+    //sort alphabetically
+    assumedUnsortedSelections.sort(null);
+    Assert.assertEquals(expandedSelections, assumedUnsortedSelections, "Expanded selections not sorted alphabetically");
+  }
+
+  /**
+   * When the same column is requested twice, once with an alias and once as part of *, then it should be returned twice
+   */
+  @Test
+  public void testAliasing() {
+    String sql = "SELECT playerID as pid,* FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertTrue(newSelections.get(0).isSetFunctionCall());
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "AS");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "playerID");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "pid");
+    boolean playerIdPresent = false;
+    for (int i = 1; i < newSelections.size(); i++) {
+      if (newSelections.get(i).getIdentifier().getName().equals("playerID")) {
+        playerIdPresent = true;
+        break;
+      }
+    }
+    Assert.assertTrue(playerIdPresent, "playerID col is missing");
+  }
+
+  /**
+   * When a function is applied to a column, then that col is returned along with the original column
+   */
+  @Test
+  public void testFuncOnColumns1() {
+    String sql = "SELECT sqrt(homeRuns),* FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertTrue(newSelections.get(0).isSetFunctionCall());
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "SQRT");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "homeRuns");
+    //homeRuns is returned as well
+    int homeRunsCnt = 0;
+    for (Expression selection : newSelections) {
+      if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) {
+        homeRunsCnt++;
+      }
+    }
+    Assert.assertEquals(homeRunsCnt, 1);
+  }
+
+  /**
+   * When a function is applied to a column, then that col is returned along with the original column
+   */
+  @Test
+  public void testFuncOnColumns2() {
+    String sql = "SELECT add(homeRuns,groundedIntoDoublePlays),* FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertTrue(newSelections.get(0).isSetFunctionCall());
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ADD");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "homeRuns");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(),
+        "groundedIntoDoublePlays");
+    //homeRuns is returned as well
+    int homeRunsCnt = 0;
+    int groundedIntoDoublePlaysCnt = 0;
+    for (Expression selection : newSelections) {
+      if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) {
+        homeRunsCnt++;
+      } else if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("groundedIntoDoublePlays")) {
+        groundedIntoDoublePlaysCnt++;
+      }
+    }
+    Assert.assertEquals(homeRunsCnt, 1);
+    Assert.assertEquals(groundedIntoDoublePlaysCnt, 1);
+  }
+
+  /**
+   * When 'n' no. of unqualified * are present, then each column is returned 'n' times.
+   */
+  @Test
+  public void testMultipleUnqualifiedStars() {
+    String sql = "SELECT *,* FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "G_old");
+    Assert.assertEquals(newSelections.get(1).getIdentifier().getName(), "groundedIntoDoublePlays");
+    Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "homeRuns");
+    Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "playerID");
+    Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "playerStint");
+    Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "G_old");
+    Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "groundedIntoDoublePlays");
+    Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "homeRuns");
+    Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "playerID");
+    Assert.assertEquals(newSelections.get(9).getIdentifier().getName(), "playerStint");
+  }
+
+  @Test
+  public void testAll() {
+    String sql =
+        "SELECT abs(homeRuns),sqrt(groundedIntoDoublePlays),*,$segmentName,$hostName,playerStint as pstint,playerID  "
+            + "FROM baseballStats";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql);
+    BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP);
+    List<Expression> newSelections = pinotQuery.getSelectList();
+    Assert.assertTrue(newSelections.get(0).isSetFunctionCall());
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ABS");
+    Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "homeRuns");
+    Assert.assertTrue(newSelections.get(1).isSetFunctionCall());
+    Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperator(), "SQRT");
+    Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "groundedIntoDoublePlays");
+    Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "G_old");
+    Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "groundedIntoDoublePlays");
+    Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "homeRuns");
+    Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "playerID");
+    Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "playerStint");
+    Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "$segmentName");
+    Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "$hostName");
+    Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperator(), "AS");
+    Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(0).getIdentifier().getName(),
+        "playerStint");
+    Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(1).getIdentifier().getName(),
+        "pstint");
+    Assert.assertEquals(newSelections.get(10).getIdentifier().getName(), "playerID");
+  }
+}

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org