You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/10/15 07:56:06 UTC

[GitHub] [pinot] Jackie-Jiang opened a new pull request, #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Jackie-Jiang opened a new pull request, #9605:
URL: https://github.com/apache/pinot/pull/9605

   Non-aggregation group-by query can be rewritten to distinct query only when the select expression set and group-by expression set are the same


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997361773


##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo
+ */
 public class NonAggregationGroupByToDistinctQueryRewriter implements QueryRewriter {
-  /**
-   * Rewrite non-aggregate group by query to distinct query.
-   * E.g.
-   * ```
-   *   SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-   *   SELECT col1, col2 FROM foo GROUP BY col1, col2 => SELECT distinct col1, col2 FROM foo
-   * ```
-   * @param pinotQuery
-   */
+
   @Override
   public PinotQuery rewrite(PinotQuery pinotQuery) {
-    boolean hasAggregation = false;
+    if (pinotQuery.getGroupByListSize() == 0) {
+      return pinotQuery;
+    }
     for (Expression select : pinotQuery.getSelectList()) {
       if (CalciteSqlParser.isAggregateExpression(select)) {
-        hasAggregation = true;
+        return pinotQuery;
       }
     }
     if (pinotQuery.getOrderByList() != null) {
       for (Expression orderBy : pinotQuery.getOrderByList()) {
         if (CalciteSqlParser.isAggregateExpression(orderBy)) {
-          hasAggregation = true;
+          return pinotQuery;
         }
       }
     }
-    if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) {
-      Set<String> selectIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getSelectList(), true);
-      Set<String> groupByIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getGroupByList(), true);
-      if (groupByIdentifiers.containsAll(selectIdentifiers)) {
-        Expression distinctExpression = RequestUtils.getFunctionExpression("distinct");
-        for (Expression select : pinotQuery.getSelectList()) {
-          distinctExpression.getFunctionCall().addToOperands(select);
-        }
-        pinotQuery.setSelectList(Arrays.asList(distinctExpression));
-        pinotQuery.setGroupByList(Collections.emptyList());
+
+    // This rewriter is applied after AliasApplier, so all the alias in group-by are already replaced with expressions
+    Set<Expression> selectExpressions = new HashSet<>();
+    for (Expression select : pinotQuery.getSelectList()) {
+      Function function = select.getFunctionCall();
+      if (function != null && function.getOperator().equals("as")) {
+        selectExpressions.add(function.getOperands().get(0));
       } else {
-        selectIdentifiers.removeAll(groupByIdentifiers);
-        throw new SqlCompilationException(String.format(
-            "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found "
-                + "identifier: %s", Arrays.toString(selectIdentifiers.toArray(new String[0]))));
+        selectExpressions.add(select);
       }
     }
-    return pinotQuery;
+    Set<Expression> groupByExpressions = new HashSet<>(pinotQuery.getGroupByList());
+    if (selectExpressions.equals(groupByExpressions)) {
+      Expression distinct = RequestUtils.getFunctionExpression("distinct");
+      distinct.getFunctionCall().setOperands(pinotQuery.getSelectList());
+      pinotQuery.setSelectList(Collections.singletonList(distinct));
+      pinotQuery.setGroupByList(null);

Review Comment:
   We should be able to handle both, but ideally we should set it to `null` because directly compiling `DISTINCT` query without `GROUP BY` clause will get `null` for this field



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997618184


##########
pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java:
##########
@@ -18,155 +18,45 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
-import org.testng.Assert;
+import org.apache.pinot.sql.parsers.SqlCompilationException;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
 
-public class NonAggregationGroupByToDistinctQueryRewriterTest {
 
+public class NonAggregationGroupByToDistinctQueryRewriterTest {
   private static final QueryRewriter QUERY_REWRITER = new NonAggregationGroupByToDistinctQueryRewriter();
 
   @Test
-  public void testQuery1() {
-    final PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT A FROM myTable GROUP BY A");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "A");
-  }
-
-  @Test
-  // SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-  public void testQuery2() {
-    final PinotQuery pinotQuery =
-        CalciteSqlParser.compileToPinotQuery("SELECT col1+col2*5 FROM foo GROUP BY col1, col2");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0)
-            .getIdentifier().getName(), "col1");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L);
+  public void testQueryRewrite() {
+    testQueryRewrite("SELECT A FROM myTable GROUP BY A", "SELECT DISTINCT A FROM myTable");
+    testQueryRewrite("SELECT col1, col2 FROM foo GROUP BY col1, col2", "SELECT DISTINCT col1, col2 FROM foo");

Review Comment:
   #9616 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] codecov-commenter commented on pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
codecov-commenter commented on PR #9605:
URL: https://github.com/apache/pinot/pull/9605#issuecomment-1279837806

   # [Codecov](https://codecov.io/gh/apache/pinot/pull/9605?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) Report
   > Merging [#9605](https://codecov.io/gh/apache/pinot/pull/9605?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (137e643) into [master](https://codecov.io/gh/apache/pinot/commit/0a442b90a337b6dd777a1d55a4361e9654e7e91d?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) (0a442b9) will **decrease** coverage by `2.64%`.
   > The diff coverage is `100.00%`.
   
   ```diff
   @@             Coverage Diff              @@
   ##             master    #9605      +/-   ##
   ============================================
   - Coverage     70.03%   67.39%   -2.65%     
   + Complexity     5288     5091     -197     
   ============================================
     Files          1937     1435     -502     
     Lines        103498    75109   -28389     
     Branches      15715    12005    -3710     
   ============================================
   - Hits          72487    50616   -21871     
   + Misses        25907    20843    -5064     
   + Partials       5104     3650    -1454     
   ```
   
   | Flag | Coverage Δ | |
   |---|---|---|
   | integration1 | `?` | |
   | integration2 | `?` | |
   | unittests1 | `67.39% <100.00%> (-0.02%)` | :arrow_down: |
   | unittests2 | `?` | |
   
   Flags with carried forward coverage won't be shown. [Click here](https://docs.codecov.io/docs/carryforward-flags?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#carryforward-flags-in-the-pull-request-comment) to find out more.
   
   | [Impacted Files](https://codecov.io/gh/apache/pinot/pull/9605?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | Coverage Δ | |
   |---|---|---|
   | [.../NonAggregationGroupByToDistinctQueryRewriter.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9zcWwvcGFyc2Vycy9yZXdyaXRlci9Ob25BZ2dyZWdhdGlvbkdyb3VwQnlUb0Rpc3RpbmN0UXVlcnlSZXdyaXRlci5qYXZh) | `100.00% <100.00%> (ø)` | |
   | [...va/org/apache/pinot/core/routing/RoutingTable.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3QvY29yZS9yb3V0aW5nL1JvdXRpbmdUYWJsZS5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...va/org/apache/pinot/common/config/NettyConfig.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vY29uZmlnL05ldHR5Q29uZmlnLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...a/org/apache/pinot/common/metrics/MinionMeter.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25NZXRlci5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...g/apache/pinot/common/metrics/ControllerMeter.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Db250cm9sbGVyTWV0ZXIuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [.../apache/pinot/common/metrics/BrokerQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9Ccm9rZXJRdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [.../apache/pinot/common/metrics/MinionQueryPhase.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWV0cmljcy9NaW5pb25RdWVyeVBoYXNlLmphdmE=) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...ache/pinot/server/access/AccessControlFactory.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29yZS9zcmMvbWFpbi9qYXZhL29yZy9hcGFjaGUvcGlub3Qvc2VydmVyL2FjY2Vzcy9BY2Nlc3NDb250cm9sRmFjdG9yeS5qYXZh) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...he/pinot/common/messages/SegmentReloadMessage.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWVzc2FnZXMvU2VnbWVudFJlbG9hZE1lc3NhZ2UuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | [...he/pinot/common/messages/TableDeletionMessage.java](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation#diff-cGlub3QtY29tbW9uL3NyYy9tYWluL2phdmEvb3JnL2FwYWNoZS9waW5vdC9jb21tb24vbWVzc2FnZXMvVGFibGVEZWxldGlvbk1lc3NhZ2UuamF2YQ==) | `0.00% <0.00%> (-100.00%)` | :arrow_down: |
   | ... and [748 more](https://codecov.io/gh/apache/pinot/pull/9605/diff?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation) | |
   
   :mega: We’re building smart automated test selection to slash your CI/CD build times. [Learn more](https://about.codecov.io/iterative-testing/?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=The+Apache+Software+Foundation)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] siddharthteotia commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
siddharthteotia commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997602000


##########
pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java:
##########
@@ -18,155 +18,45 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
-import org.testng.Assert;
+import org.apache.pinot.sql.parsers.SqlCompilationException;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
 
-public class NonAggregationGroupByToDistinctQueryRewriterTest {
 
+public class NonAggregationGroupByToDistinctQueryRewriterTest {
   private static final QueryRewriter QUERY_REWRITER = new NonAggregationGroupByToDistinctQueryRewriter();
 
   @Test
-  public void testQuery1() {
-    final PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT A FROM myTable GROUP BY A");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "A");
-  }
-
-  @Test
-  // SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-  public void testQuery2() {
-    final PinotQuery pinotQuery =
-        CalciteSqlParser.compileToPinotQuery("SELECT col1+col2*5 FROM foo GROUP BY col1, col2");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0)
-            .getIdentifier().getName(), "col1");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L);
+  public void testQueryRewrite() {
+    testQueryRewrite("SELECT A FROM myTable GROUP BY A", "SELECT DISTINCT A FROM myTable");
+    testQueryRewrite("SELECT col1, col2 FROM foo GROUP BY col1, col2", "SELECT DISTINCT col1, col2 FROM foo");

Review Comment:
   How do we handle when the user (intentionally or unintentionally) specifies different order in GROUP BY clause ?
   
   `SELECT col1, col2 FROM FOO GROUP BY col2, col1`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] walterddr commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
walterddr commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997305623


##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo
+ */
 public class NonAggregationGroupByToDistinctQueryRewriter implements QueryRewriter {
-  /**
-   * Rewrite non-aggregate group by query to distinct query.
-   * E.g.
-   * ```
-   *   SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-   *   SELECT col1, col2 FROM foo GROUP BY col1, col2 => SELECT distinct col1, col2 FROM foo
-   * ```
-   * @param pinotQuery
-   */
+
   @Override
   public PinotQuery rewrite(PinotQuery pinotQuery) {
-    boolean hasAggregation = false;
+    if (pinotQuery.getGroupByListSize() == 0) {
+      return pinotQuery;
+    }
     for (Expression select : pinotQuery.getSelectList()) {
       if (CalciteSqlParser.isAggregateExpression(select)) {
-        hasAggregation = true;
+        return pinotQuery;
       }
     }
     if (pinotQuery.getOrderByList() != null) {
       for (Expression orderBy : pinotQuery.getOrderByList()) {
         if (CalciteSqlParser.isAggregateExpression(orderBy)) {
-          hasAggregation = true;
+          return pinotQuery;
         }
       }
     }
-    if (!hasAggregation && pinotQuery.getGroupByListSize() > 0) {
-      Set<String> selectIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getSelectList(), true);
-      Set<String> groupByIdentifiers = CalciteSqlParser.extractIdentifiers(pinotQuery.getGroupByList(), true);
-      if (groupByIdentifiers.containsAll(selectIdentifiers)) {
-        Expression distinctExpression = RequestUtils.getFunctionExpression("distinct");
-        for (Expression select : pinotQuery.getSelectList()) {
-          distinctExpression.getFunctionCall().addToOperands(select);
-        }
-        pinotQuery.setSelectList(Arrays.asList(distinctExpression));
-        pinotQuery.setGroupByList(Collections.emptyList());
+
+    // This rewriter is applied after AliasApplier, so all the alias in group-by are already replaced with expressions
+    Set<Expression> selectExpressions = new HashSet<>();
+    for (Expression select : pinotQuery.getSelectList()) {
+      Function function = select.getFunctionCall();
+      if (function != null && function.getOperator().equals("as")) {
+        selectExpressions.add(function.getOperands().get(0));
       } else {
-        selectIdentifiers.removeAll(groupByIdentifiers);
-        throw new SqlCompilationException(String.format(
-            "For non-aggregation group by query, all the identifiers in select clause should be in groupBys. Found "
-                + "identifier: %s", Arrays.toString(selectIdentifiers.toArray(new String[0]))));
+        selectExpressions.add(select);
       }
     }
-    return pinotQuery;
+    Set<Expression> groupByExpressions = new HashSet<>(pinotQuery.getGroupByList());
+    if (selectExpressions.equals(groupByExpressions)) {
+      Expression distinct = RequestUtils.getFunctionExpression("distinct");
+      distinct.getFunctionCall().setOperands(pinotQuery.getSelectList());
+      pinotQuery.setSelectList(Collections.singletonList(distinct));
+      pinotQuery.setGroupByList(null);

Review Comment:
   what's the difference from setting it to null vs empty?



##########
pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java:
##########
@@ -2217,7 +2214,7 @@ public void testNonAggregationGroupByQuery() {
     Assert.assertEquals(
         pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2");
 
-    query = "SELECT col1+col2*5 FROM foo GROUP BY col1, col2";
+    query = "SELECT col1+col2*5 FROM foo GROUP BY col1+col2*5";

Review Comment:
   please add a test to catch that the original query now throws exception during rewrite.



##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo

Review Comment:
   please also add the non-supported rewrites as those removed tests below. 
   they are valid queries just pinot returns the wrong result (with the wrong rewrite), and those queries will not throw exception during rewrite



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] Jackie-Jiang merged pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
Jackie-Jiang merged PR #9605:
URL: https://github.com/apache/pinot/pull/9605


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997365417


##########
pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java:
##########
@@ -18,58 +18,67 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Set;
 import org.apache.pinot.common.request.Expression;
+import org.apache.pinot.common.request.Function;
 import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.common.utils.request.RequestUtils;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
 import org.apache.pinot.sql.parsers.SqlCompilationException;
 
 
+/**
+ * Rewrite non-aggregation group-by query to distinct query.
+ * The query can be rewritten only if select expression set and group-by expression set are the same.
+ * E.g.
+ * SELECT col1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col2 FROM foo
+ * SELECT col1 + col2 FROM foo GROUP BY col1 + col2 --> SELECT DISTINCT col1 + col2 FROM foo
+ * SELECT col1 AS c1 FROM foo GROUP BY col1 --> SELECT DISTINCT col1 AS c1 FROM foo
+ * SELECT col1, col1 AS c1, col2 FROM foo GROUP BY col1, col2 --> SELECT DISTINCT col1, col1 AS ci, col2 FROM foo

Review Comment:
   Added some more comments and tests



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[GitHub] [pinot] Jackie-Jiang commented on a diff in pull request #9605: Fix NonAggregationGroupByToDistinctQueryRewriter

Posted by GitBox <gi...@apache.org>.
Jackie-Jiang commented on code in PR #9605:
URL: https://github.com/apache/pinot/pull/9605#discussion_r997617054


##########
pinot-common/src/test/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriterTest.java:
##########
@@ -18,155 +18,45 @@
  */
 package org.apache.pinot.sql.parsers.rewriter;
 
-import org.apache.pinot.common.request.PinotQuery;
 import org.apache.pinot.sql.parsers.CalciteSqlParser;
-import org.testng.Assert;
+import org.apache.pinot.sql.parsers.SqlCompilationException;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertThrows;
 
-public class NonAggregationGroupByToDistinctQueryRewriterTest {
 
+public class NonAggregationGroupByToDistinctQueryRewriterTest {
   private static final QueryRewriter QUERY_REWRITER = new NonAggregationGroupByToDistinctQueryRewriter();
 
   @Test
-  public void testQuery1() {
-    final PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT A FROM myTable GROUP BY A");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "A");
-  }
-
-  @Test
-  // SELECT col1+col2*5 FROM foo GROUP BY col1, col2 => SELECT distinct col1+col2*5 FROM foo
-  public void testQuery2() {
-    final PinotQuery pinotQuery =
-        CalciteSqlParser.compileToPinotQuery("SELECT col1+col2*5 FROM foo GROUP BY col1, col2");
-    QUERY_REWRITER.rewrite(pinotQuery);
-    Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinct");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0)
-            .getIdentifier().getName(), "col1");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col2");
-    Assert.assertEquals(
-        pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1)
-            .getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L);
+  public void testQueryRewrite() {
+    testQueryRewrite("SELECT A FROM myTable GROUP BY A", "SELECT DISTINCT A FROM myTable");
+    testQueryRewrite("SELECT col1, col2 FROM foo GROUP BY col1, col2", "SELECT DISTINCT col1, col2 FROM foo");

Review Comment:
   It will be `SELECT DISTINCT col1, col2 FROM FOO`. Let me add a test for it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org