You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2020/07/09 04:29:59 UTC

[incubator-pinot] 01/01: Rewrite non-aggregate group by query to distinct query

This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch rewrite-non-groupby-to-distinct
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit b1c65e7746af7e310bf733127be5e84db407fdcd
Author: Xiang Fu <fx...@gmail.com>
AuthorDate: Wed Jul 8 21:29:41 2020 -0700

    Rewrite non-aggregate group by query to distinct query
---
 .../apache/pinot/sql/parsers/CalciteSqlParser.java | 30 ++++++++++++
 .../pinot/sql/parsers/CalciteSqlCompilerTest.java  | 54 ++++++++++++++++++++++
 .../tests/ClusterIntegrationTestUtils.java         | 12 ++++-
 .../tests/OfflineClusterIntegrationTest.java       | 41 ++++++++++++++++
 4 files changed, 136 insertions(+), 1 deletion(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index de14a51..fffb5bd 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -19,6 +19,8 @@
 package org.apache.pinot.sql.parsers;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -331,12 +333,40 @@ public class CalciteSqlParser {
       pinotQuery.setFilterExpression(updatedFilterExpression);
     }
 
+    // Rewrite GroupBy to Distinct
+    rewriteGroupByToDistinct(pinotQuery);
+
     // Update alias
     Map<Identifier, Expression> aliasMap = extractAlias(pinotQuery.getSelectList());
     applyAlias(aliasMap, pinotQuery);
     validate(aliasMap, pinotQuery);
   }
 
+  private static void rewriteGroupByToDistinct(PinotQuery pinotQuery) {
+    boolean hasAggregation = false;
+    for (Expression select : pinotQuery.getSelectList()) {
+      if (isAggregateExpression(select)) {
+        hasAggregation = true;
+      }
+    }
+    if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) {
+      Set<String> selectIdentifiers = extractIdentifiers(pinotQuery.getSelectList());
+      Set<String> groupByIdentifiers = extractIdentifiers(pinotQuery.getGroupByList());
+      if (groupByIdentifiers.containsAll(selectIdentifiers)) {
+        Expression distinctExpression = RequestUtils.getFunctionExpression("DISTINCT");
+        for (Expression select : pinotQuery.getSelectList()) {
+          distinctExpression.getFunctionCall().addToOperands(select);
+        }
+        pinotQuery.setSelectList(Arrays.asList(distinctExpression));
+        pinotQuery.setGroupByList(Collections.emptyList());
+      } else {
+        selectIdentifiers.removeAll(groupByIdentifiers);
+        throw new SqlCompilationException(String.format("For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: %s",
+            Arrays.toString(selectIdentifiers.toArray(new String[0]))));
+      }
+    }
+  }
+
   private static void invokeCompileTimeFunctions(PinotQuery pinotQuery) {
     for (int i = 0; i < pinotQuery.getSelectListSize(); i++) {
       Expression expression = invokeCompileTimeFunctionExpression(pinotQuery.getSelectList().get(i));
diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index 826741c..3b84937 100644
--- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -1724,4 +1724,58 @@ public class CalciteSqlCompilerTest {
     Assert.assertEquals(brokerRequest.getFilterQuery().getOperator(), FilterOperator.IS_NULL);
     Assert.assertEquals(brokerRequest.getFilterQuery().getColumn(), "col");
   }
+
+  @Test
+  public void testNonAggregationGroupByQuery() {
+    PinotQuery2BrokerRequestConverter converter = new PinotQuery2BrokerRequestConverter();
+    String query = "SELECT col1 FROM foo GROUP BY col1";
+    PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+    BrokerRequest brokerRequest = converter.convert(pinotQuery);
+    Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1");
+
+    query = "SELECT col1, col2 FROM foo GROUP BY col1, col2";
+    pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+    brokerRequest = converter.convert(pinotQuery);
+    Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2");
+
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1:col2");
+
+    query = "SELECT col1+col2*5 FROM foo GROUP BY col1, col2";
+    pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+    brokerRequest = converter.convert(pinotQuery);
+    Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperator(), "PLUS");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperator(), "TIMES");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2");
+    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L);
+
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+    Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "plus(col1,times(col2,'5'))");
+  }
+
+  @Test(expectedExceptions = SqlCompilationException.class)
+  public void testInvalidNonAggregationGroupBy() {
+    // Not support Aggregation functions in case statements.
+    try {
+      CalciteSqlParser.compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1");
+    } catch (SqlCompilationException e) {
+      Assert.assertEquals(e.getMessage(),
+          "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: [col2]");
+      throw e;
+    }
+  }
 }
\ No newline at end of file
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
index 64b1c47..85c15e7 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
@@ -857,7 +857,7 @@ public class ClusterIntegrationTestUtils {
     // compare results
     BrokerRequest brokerRequest =
         PinotQueryParserFactory.get(CommonConstants.Broker.Request.SQL).compileToBrokerRequest(pinotQuery);
-    if (brokerRequest.getSelections() != null) { // selection
+    if (isSelectionQuery(brokerRequest)) { // selection
       // TODO: compare results for selection queries, w/o order by
 
       // Compare results for selection queries, with order by
@@ -962,6 +962,16 @@ public class ClusterIntegrationTestUtils {
     }
   }
 
+  private static boolean isSelectionQuery(BrokerRequest brokerRequest) {
+    if (brokerRequest.getSelections() != null) {
+      return true;
+    }
+    if (brokerRequest.getAggregationsInfo() != null && brokerRequest.getAggregationsInfo().get(0).getAggregationType().equalsIgnoreCase("DISTINCT")) {
+      return true;
+    }
+    return false;
+  }
+
   private static boolean fuzzyCompare(String h2Value, String brokerValue, String connectionValue) {
     // Fuzzy compare expected value and actual value
     boolean error = false;
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 3cde6b5..774f209 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -1074,18 +1074,59 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
     String pql = "SELECT DISTINCT(Carrier) FROM mytable LIMIT 1000000";
     String sql = "SELECT DISTINCT Carrier FROM mytable";
     testQuery(pql, Collections.singletonList(sql));
+    pql = "SELECT DISTINCT Carrier FROM mytable LIMIT 1000000";
+    testSqlQuery(pql, Collections.singletonList(sql));
 
     pql = "SELECT DISTINCT(Carrier, DestAirportID) FROM mytable LIMIT 1000000";
     sql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable";
     testQuery(pql, Collections.singletonList(sql));
+    pql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable LIMIT 1000000";
+    testSqlQuery(pql, Collections.singletonList(sql));
 
     pql = "SELECT DISTINCT(Carrier, DestAirportID, DestStateName) FROM mytable LIMIT 1000000";
     sql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable";
     testQuery(pql, Collections.singletonList(sql));
+    pql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable LIMIT 1000000";
+    testSqlQuery(pql, Collections.singletonList(sql));
 
     pql = "SELECT DISTINCT(Carrier, DestAirportID, DestCityName) FROM mytable LIMIT 1000000";
     sql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable";
     testQuery(pql, Collections.singletonList(sql));
+    pql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable LIMIT 1000000";
+    testSqlQuery(pql, Collections.singletonList(sql));
+  }
+
+  @Test
+  public void testNonAggregationGroupByQuery()
+      throws Exception {
+    // by default 10 rows will be returned, so use high limit
+    String pql = "SELECT Carrier FROM mytable GROUP BY Carrier LIMIT 1000000";
+    String sql = "SELECT Carrier FROM mytable GROUP BY Carrier";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID LIMIT 1000000";
+    sql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName LIMIT 1000000";
+    sql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName LIMIT 1000000";
+    sql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
+    sql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
+    sql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime";
+    testSqlQuery(pql, Collections.singletonList(sql));
+
+    pql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime LIMIT 1000000";
+    sql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime";
+    testSqlQuery(pql, Collections.singletonList(sql));
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org