You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2020/07/09 04:29:59 UTC
[incubator-pinot] 01/01: Rewrite non-aggregate group by query to
distinct query
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch rewrite-non-groupby-to-distinct
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit b1c65e7746af7e310bf733127be5e84db407fdcd
Author: Xiang Fu <fx...@gmail.com>
AuthorDate: Wed Jul 8 21:29:41 2020 -0700
Rewrite non-aggregate group by query to distinct query
---
.../apache/pinot/sql/parsers/CalciteSqlParser.java | 30 ++++++++++++
.../pinot/sql/parsers/CalciteSqlCompilerTest.java | 54 ++++++++++++++++++++++
.../tests/ClusterIntegrationTestUtils.java | 12 ++++-
.../tests/OfflineClusterIntegrationTest.java | 41 ++++++++++++++++
4 files changed, 136 insertions(+), 1 deletion(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
index de14a51..fffb5bd 100644
--- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
+++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java
@@ -19,6 +19,8 @@
package org.apache.pinot.sql.parsers;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -331,12 +333,40 @@ public class CalciteSqlParser {
pinotQuery.setFilterExpression(updatedFilterExpression);
}
+ // Rewrite GroupBy to Distinct
+ rewriteGroupByToDistinct(pinotQuery);
+
// Update alias
Map<Identifier, Expression> aliasMap = extractAlias(pinotQuery.getSelectList());
applyAlias(aliasMap, pinotQuery);
validate(aliasMap, pinotQuery);
}
+ private static void rewriteGroupByToDistinct(PinotQuery pinotQuery) {
+ boolean hasAggregation = false;
+ for (Expression select : pinotQuery.getSelectList()) {
+ if (isAggregateExpression(select)) {
+ hasAggregation = true;
+ }
+ }
+ if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) {
+ Set<String> selectIdentifiers = extractIdentifiers(pinotQuery.getSelectList());
+ Set<String> groupByIdentifiers = extractIdentifiers(pinotQuery.getGroupByList());
+ if (groupByIdentifiers.containsAll(selectIdentifiers)) {
+ Expression distinctExpression = RequestUtils.getFunctionExpression("DISTINCT");
+ for (Expression select : pinotQuery.getSelectList()) {
+ distinctExpression.getFunctionCall().addToOperands(select);
+ }
+ pinotQuery.setSelectList(Arrays.asList(distinctExpression));
+ pinotQuery.setGroupByList(Collections.emptyList());
+ } else {
+ selectIdentifiers.removeAll(groupByIdentifiers);
+ throw new SqlCompilationException(String.format("For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: %s",
+ Arrays.toString(selectIdentifiers.toArray(new String[0]))));
+ }
+ }
+ }
+
private static void invokeCompileTimeFunctions(PinotQuery pinotQuery) {
for (int i = 0; i < pinotQuery.getSelectListSize(); i++) {
Expression expression = invokeCompileTimeFunctionExpression(pinotQuery.getSelectList().get(i));
diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
index 826741c..3b84937 100644
--- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java
@@ -1724,4 +1724,58 @@ public class CalciteSqlCompilerTest {
Assert.assertEquals(brokerRequest.getFilterQuery().getOperator(), FilterOperator.IS_NULL);
Assert.assertEquals(brokerRequest.getFilterQuery().getColumn(), "col");
}
+
+ @Test
+ public void testNonAggregationGroupByQuery() {
+ PinotQuery2BrokerRequestConverter converter = new PinotQuery2BrokerRequestConverter();
+ String query = "SELECT col1 FROM foo GROUP BY col1";
+ PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+ BrokerRequest brokerRequest = converter.convert(pinotQuery);
+ Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1");
+
+ query = "SELECT col1, col2 FROM foo GROUP BY col1, col2";
+ pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+ brokerRequest = converter.convert(pinotQuery);
+ Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2");
+
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "col1:col2");
+
+ query = "SELECT col1+col2*5 FROM foo GROUP BY col1, col2";
+ pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
+ brokerRequest = converter.convert(pinotQuery);
+ Assert.assertEquals(pinotQuery.getSelectListSize(), 1);
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperator(), "PLUS");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperator(), "TIMES");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2");
+ Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L);
+
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().size(), 1);
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationType().toUpperCase(), "DISTINCT");
+ Assert.assertEquals(brokerRequest.getAggregationsInfo().get(0).getAggregationParams().get("column"), "plus(col1,times(col2,'5'))");
+ }
+
+ @Test(expectedExceptions = SqlCompilationException.class)
+ public void testInvalidNonAggregationGroupBy() {
+ // Not support Aggregation functions in case statements.
+ try {
+ CalciteSqlParser.compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1");
+ } catch (SqlCompilationException e) {
+ Assert.assertEquals(e.getMessage(),
+ "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found identifier: [col2]");
+ throw e;
+ }
+ }
}
\ No newline at end of file
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
index 64b1c47..85c15e7 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ClusterIntegrationTestUtils.java
@@ -857,7 +857,7 @@ public class ClusterIntegrationTestUtils {
// compare results
BrokerRequest brokerRequest =
PinotQueryParserFactory.get(CommonConstants.Broker.Request.SQL).compileToBrokerRequest(pinotQuery);
- if (brokerRequest.getSelections() != null) { // selection
+ if (isSelectionQuery(brokerRequest)) { // selection
// TODO: compare results for selection queries, w/o order by
// Compare results for selection queries, with order by
@@ -962,6 +962,16 @@ public class ClusterIntegrationTestUtils {
}
}
+ private static boolean isSelectionQuery(BrokerRequest brokerRequest) {
+ if (brokerRequest.getSelections() != null) {
+ return true;
+ }
+ if (brokerRequest.getAggregationsInfo() != null && brokerRequest.getAggregationsInfo().get(0).getAggregationType().equalsIgnoreCase("DISTINCT")) {
+ return true;
+ }
+ return false;
+ }
+
private static boolean fuzzyCompare(String h2Value, String brokerValue, String connectionValue) {
// Fuzzy compare expected value and actual value
boolean error = false;
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 3cde6b5..774f209 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -1074,18 +1074,59 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
String pql = "SELECT DISTINCT(Carrier) FROM mytable LIMIT 1000000";
String sql = "SELECT DISTINCT Carrier FROM mytable";
testQuery(pql, Collections.singletonList(sql));
+ pql = "SELECT DISTINCT Carrier FROM mytable LIMIT 1000000";
+ testSqlQuery(pql, Collections.singletonList(sql));
pql = "SELECT DISTINCT(Carrier, DestAirportID) FROM mytable LIMIT 1000000";
sql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable";
testQuery(pql, Collections.singletonList(sql));
+ pql = "SELECT DISTINCT Carrier, DestAirportID FROM mytable LIMIT 1000000";
+ testSqlQuery(pql, Collections.singletonList(sql));
pql = "SELECT DISTINCT(Carrier, DestAirportID, DestStateName) FROM mytable LIMIT 1000000";
sql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable";
testQuery(pql, Collections.singletonList(sql));
+ pql = "SELECT DISTINCT Carrier, DestAirportID, DestStateName FROM mytable LIMIT 1000000";
+ testSqlQuery(pql, Collections.singletonList(sql));
pql = "SELECT DISTINCT(Carrier, DestAirportID, DestCityName) FROM mytable LIMIT 1000000";
sql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable";
testQuery(pql, Collections.singletonList(sql));
+ pql = "SELECT DISTINCT Carrier, DestAirportID, DestCityName FROM mytable LIMIT 1000000";
+ testSqlQuery(pql, Collections.singletonList(sql));
+ }
+
+ @Test
+ public void testNonAggregationGroupByQuery()
+ throws Exception {
+ // by default 10 rows will be returned, so use high limit
+ String pql = "SELECT Carrier FROM mytable GROUP BY Carrier LIMIT 1000000";
+ String sql = "SELECT Carrier FROM mytable GROUP BY Carrier";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID LIMIT 1000000";
+ sql = "SELECT Carrier, DestAirportID FROM mytable GROUP BY Carrier, DestAirportID";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName LIMIT 1000000";
+ sql = "SELECT Carrier, DestAirportID, DestStateName FROM mytable GROUP BY Carrier, DestAirportID, DestStateName";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName LIMIT 1000000";
+ sql = "SELECT Carrier, DestAirportID, DestCityName FROM mytable GROUP BY Carrier, DestAirportID, DestCityName";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
+ sql = "SELECT ArrTime-DepTime FROM mytable GROUP BY ArrTime, DepTime";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime LIMIT 1000000";
+ sql = "SELECT ArrTime-DepTime,ArrTime/3,DepTime*2 FROM mytable GROUP BY ArrTime, DepTime";
+ testSqlQuery(pql, Collections.singletonList(sql));
+
+ pql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime LIMIT 1000000";
+ sql = "SELECT ArrTime+DepTime FROM mytable GROUP BY ArrTime + DepTime";
+ testSqlQuery(pql, Collections.singletonList(sql));
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org