You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/10/17 17:01:28 UTC

[GitHub] [pinot] walterddr commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

walterddr commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997305623


##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo
+ */
 public class NonAggregationGroupByToDistinctQueryRewriter implements QueryRewriter {
-  /**
-   * Rewrite non-aggregate group by query to distinct query.
-   * E.g.
-   * ```
-   *   SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-   *   SELECT col1, col2 FROM foo GROUP BY col1, col2 => SELECT distinct col1, col2 FROM foo
-   * ```
-   * @param pinotQuery
-   */
+
   @Override
   public PinotQuery rewrite(PinotQuery pinotQuery) {
-    boolean hasAggregation = false;
+    if (pinotQuery.getGroupByListSize() == 0) {
+      return pinotQuery;
+    }
     for (Expression select : pinotQuery.getSelectList()) {
       if (CalciteSqlParser.isAggregateExpression(select)) {
-        hasAggregation = true;
+        return pinotQuery;
       }
     }
     if (pinotQuery.getOrderByList() != null) {
       for (Expression orderBy : pinotQuery.getOrderByList()) {
         if (CalciteSqlParser.isAggregateExpression(orderBy)) {
-          hasAggregation = true;
+          return pinotQuery;
         }
       }
     }
-    if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) {
-      Set<String> selectIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getSelectList(), true);
-      Set<String> groupByIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getGroupByList(), true);
-      if (groupByIdentifiers.containsAll(selectIdentifiers)) {
-        Expression distinctExpression = RequestUtils.getFunctionExpression("distinct");
-        for (Expression select : pinotQuery.getSelectList()) {
-          distinctExpression.getFunctionCall().addToOperands(select);
-        }
-        pinotQuery.setSelectList(Arrays.asList(distinctExpression));
-        pinotQuery.setGroupByList(Collections.emptyList());
+
+    // This rewriter is applied after AliasApplier, so all the alias in group-by are already replaced with expressions
+    Set<Expression> selectExpressions = new HashSet<>();
+    for (Expression select : pinotQuery.getSelectList()) {
+      Function function = select.getFunctionCall();
+      if (function != null && function.getOperator().equals("as")) {
+        selectExpressions.add(function.getOperands().get(0));
       } else {
-        selectIdentifiers.removeAll(groupByIdentifiers);
-        throw new SqlCompilationException(String.format(
-            "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found "
-                + "identifier: %s", Arrays.toString(selectIdentifiers.toArray(new String[0]))));
+        selectExpressions.add(select);
       }
     }
-    return pinotQuery;
+    Set<Expression> groupByExpressions = new HashSet<>(pinotQuery.getGroupByList());
+    if (selectExpressions.equals(groupByExpressions)) {
+      Expression distinct = RequestUtils.getFunctionExpression("distinct");
+      distinct.getFunctionCall().setOperands(pinotQuery.getSelectList());
+      pinotQuery.setSelectList(Collections.singletonList(distinct));
+      pinotQuery.setGroupByList(null);

Review Comment:
   what's the difference from setting it to null vs empty?



##########
pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java:
##########
@@ -2217,7 +2214,7 @@ public void testNonAggregationGroupByQuery() {
     Assert.assertEquals(
         pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2");
 
-    query = "SELECT col1+col2*5 FROM foo GROUP BY col1, col2";
+    query = "SELECT col1+col2*5 FROM foo GROUP BY col1+col2*5";

Review Comment:
   please add a test to catch that the original query now throws exception during rewrite.



##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo

Review Comment:
   please also add the non-supported rewrites as those removed tests below. 
   they are valid queries just pinot returns the wrong result (with the wrong rewrite), and those queries will not throw exception during rewrite



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org