You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@phoenix.apache.org by comnetwork <gi...@git.apache.org> on 2018/07/31 04:45:54 UTC
[GitHub] phoenix pull request #320: PHOENIX-4820
GitHub user comnetwork opened a pull request:
https://github.com/apache/phoenix/pull/320
PHOENIX-4820
This patch is mainly for:
1.Isolate the changes made for QueryCompiler to OrderByCompiler.
2.Check more Expression besides ComparisonExpression for IsColumnConstantExpressionVisitor, add tests for InListExpression , CorceExpression and RVC.
3. I think ExpressionUtil.isConstant(Expression) is not suitable for OrderPreservingTracker.IsConstantVisitor, isStateless() is to check if the expression is depend on the server state, even for RowKeyColumnExpression , isStateless() is false. What we want to check is if a expression is constant when all children of it are constants, just consider following sql:
select a.ak3 from
(select rand() ak1,length(pk2) ak2,length(pk3) ak3,length(v1) av1,length(v2) av2 from test order by pk2,pk3 limit 10) a
where a.ak1 = 0.0 and a.av2 = length(substr('abc',1,1))
group by a.ak3,CASE WHEN coalesce(a.ak1,1) > coalesce(a.av2,2) THEN coalesce(a.ak1,1) ELSE coalesce(a.av2,2) END,a.av1
order by a.ak3,a.av1
Obviously , because of rand(), the Determinism of expression a.ak1 is Determinism.PER_INVOCATION, so for expression "CASE WHEN coalesce(a.ak1,1) > coalesce(a.av2,2) THEN coalesce(a.ak1,1) ELSE coalesce(a.av2,2) END", the determinism is Determinism.PER_INVOCATION and isStateless is false , but because the a.ak1 and a.av2 are both constants in where clause of outer query, we can regard "CASE WHEN coalesce(a.ak1,1) > coalesce(a.av2,2) THEN coalesce(a.ak1,1) ELSE coalesce(a.av2,2) END" as constant in IsConstantVisitor.
You can merge this pull request into a Git repository by running:
$ git pull https://github.com/comnetwork/phoenix 4.x-HBase-1.3
Alternatively you can review and apply these changes as the patch at:
https://github.com/apache/phoenix/pull/320.patch
To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:
This closes #320
----
commit ae0e6ef4dded2c3a8c3d6fffa200acbe971688f8
Author: chenglei <ch...@...>
Date: 2018-07-31T04:34:25Z
PHOENIX-4820 v2
----
---
[GitHub] phoenix issue #320: PHOENIX-4820
Posted by JamesRTaylor <gi...@git.apache.org>.
Github user JamesRTaylor commented on the issue:
https://github.com/apache/phoenix/pull/320
Thanks for the earlier comment, @maryannxue. Do you think this approach is a good first step? Can you think of any cases where this approach will be problematic?
---
[GitHub] phoenix pull request #320: PHOENIX-4820
Posted by JamesRTaylor <gi...@git.apache.org>.
Github user JamesRTaylor commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/320#discussion_r206717157
--- Diff: phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java ---
@@ -2951,7 +2951,129 @@ public void testOrderPreservingGroupBy() throws Exception {
}
}
}
-
+
+ @Test
+ public void testOrderPreservingGroupByForNotPkColumns() throws Exception {
+
+ try (Connection conn= DriverManager.getConnection(getUrl())) {
+
+ conn.createStatement().execute("CREATE TABLE test (\n" +
+ " pk1 varchar, \n" +
+ " pk2 varchar, \n" +
+ " pk3 varchar, \n" +
+ " pk4 varchar, \n" +
+ " v1 varchar, \n" +
+ " v2 varchar,\n" +
+ " CONSTRAINT pk PRIMARY KEY (\n" +
+ " pk1,\n" +
+ " pk2,\n" +
+ " pk3,\n" +
+ " pk4\n" +
+ " )\n" +
+ " )");
+ String[] queries = new String[] {
+ "SELECT pk3 FROM test WHERE v2 = 'a' GROUP BY substr(v2,0,1),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'c' and v2 = substr('abc',1,1) GROUP BY v2,pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = 'b' GROUP BY length(v1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = 'b' GROUP BY length(pk1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN v1 > v2 THEN v1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN pk1 > v2 THEN pk1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and pk2 = 'b' and v1 = 'c' GROUP BY CASE WHEN pk1 > pk2 THEN v1 WHEN pk1 = pk2 THEN pk1 ELSE pk2 END,pk3 ORDER BY pk3"
+ };
+ int index = 0;
+ for (String query : queries) {
+ QueryPlan plan = getQueryPlan(conn, query);
+ assertTrue((index + 1) + ") " + queries[index], plan.getOrderBy().getOrderByExpressions().isEmpty());
+ index++;
+ }
+ }
+ }
+
+ @Test
+ public void testOrderPreservingGroupByForClientAggregatePlan() throws Exception {
+ Connection conn = null;
+ try {
+ conn = DriverManager.getConnection(getUrl());
+ String tableName1 = "test_table";
+ String sql = "create table " + tableName1 + "( "+
+ " pk1 varchar not null , " +
+ " pk2 varchar not null, " +
+ " pk3 varchar not null," +
+ " v1 varchar, " +
+ " v2 varchar, " +
+ " CONSTRAINT TEST_PK PRIMARY KEY ( "+
+ "pk1,"+
+ "pk2,"+
+ "pk3 ))";
+ conn.createStatement().execute(sql);
+
+ String[] queries = new String[] {
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "group by a.ak3,a.av1 order by a.ak3,a.av1",
+
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 = 'a' GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
+
+ //for InListExpression
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 in('a') GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
--- End diff --
Never mind - just saw your comment in the new visitor and you only want to optimize this single constant case. Would it make sense to have a negative test for this below to make sure it doesn't get optimized when there are more than a single constant in an IN clause?
---
[GitHub] phoenix pull request #320: PHOENIX-4820
Posted by JamesRTaylor <gi...@git.apache.org>.
Github user JamesRTaylor commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/320#discussion_r206715618
--- Diff: phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java ---
@@ -2951,7 +2951,129 @@ public void testOrderPreservingGroupBy() throws Exception {
}
}
}
-
+
+ @Test
+ public void testOrderPreservingGroupByForNotPkColumns() throws Exception {
+
+ try (Connection conn= DriverManager.getConnection(getUrl())) {
+
+ conn.createStatement().execute("CREATE TABLE test (\n" +
+ " pk1 varchar, \n" +
+ " pk2 varchar, \n" +
+ " pk3 varchar, \n" +
+ " pk4 varchar, \n" +
+ " v1 varchar, \n" +
+ " v2 varchar,\n" +
+ " CONSTRAINT pk PRIMARY KEY (\n" +
+ " pk1,\n" +
+ " pk2,\n" +
+ " pk3,\n" +
+ " pk4\n" +
+ " )\n" +
+ " )");
+ String[] queries = new String[] {
+ "SELECT pk3 FROM test WHERE v2 = 'a' GROUP BY substr(v2,0,1),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'c' and v2 = substr('abc',1,1) GROUP BY v2,pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = 'b' GROUP BY length(v1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = 'b' GROUP BY length(pk1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN v1 > v2 THEN v1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN pk1 > v2 THEN pk1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and pk2 = 'b' and v1 = 'c' GROUP BY CASE WHEN pk1 > pk2 THEN v1 WHEN pk1 = pk2 THEN pk1 ELSE pk2 END,pk3 ORDER BY pk3"
+ };
+ int index = 0;
+ for (String query : queries) {
+ QueryPlan plan = getQueryPlan(conn, query);
+ assertTrue((index + 1) + ") " + queries[index], plan.getOrderBy().getOrderByExpressions().isEmpty());
+ index++;
+ }
+ }
+ }
+
+ @Test
+ public void testOrderPreservingGroupByForClientAggregatePlan() throws Exception {
+ Connection conn = null;
+ try {
+ conn = DriverManager.getConnection(getUrl());
+ String tableName1 = "test_table";
+ String sql = "create table " + tableName1 + "( "+
+ " pk1 varchar not null , " +
+ " pk2 varchar not null, " +
+ " pk3 varchar not null," +
+ " v1 varchar, " +
+ " v2 varchar, " +
+ " CONSTRAINT TEST_PK PRIMARY KEY ( "+
+ "pk1,"+
+ "pk2,"+
+ "pk3 ))";
+ conn.createStatement().execute(sql);
+
+ String[] queries = new String[] {
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "group by a.ak3,a.av1 order by a.ak3,a.av1",
+
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 = 'a' GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
+
+ //for InListExpression
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 in('a') GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
--- End diff --
For the IN test, please add more than one constant (or this gets optimized to a.av2='a'
---
[GitHub] phoenix pull request #320: PHOENIX-4820
Posted by comnetwork <gi...@git.apache.org>.
Github user comnetwork commented on a diff in the pull request:
https://github.com/apache/phoenix/pull/320#discussion_r206739045
--- Diff: phoenix-core/src/test/java/org/apache/phoenix/compile/QueryCompilerTest.java ---
@@ -2951,7 +2951,129 @@ public void testOrderPreservingGroupBy() throws Exception {
}
}
}
-
+
+ @Test
+ public void testOrderPreservingGroupByForNotPkColumns() throws Exception {
+
+ try (Connection conn= DriverManager.getConnection(getUrl())) {
+
+ conn.createStatement().execute("CREATE TABLE test (\n" +
+ " pk1 varchar, \n" +
+ " pk2 varchar, \n" +
+ " pk3 varchar, \n" +
+ " pk4 varchar, \n" +
+ " v1 varchar, \n" +
+ " v2 varchar,\n" +
+ " CONSTRAINT pk PRIMARY KEY (\n" +
+ " pk1,\n" +
+ " pk2,\n" +
+ " pk3,\n" +
+ " pk4\n" +
+ " )\n" +
+ " )");
+ String[] queries = new String[] {
+ "SELECT pk3 FROM test WHERE v2 = 'a' GROUP BY substr(v2,0,1),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'c' and v2 = substr('abc',1,1) GROUP BY v2,pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = 'b' GROUP BY length(v1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = 'b' GROUP BY length(pk1)+length(v2),pk3 ORDER BY pk3",
+ "SELECT pk3 FROM test WHERE v1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN v1 > v2 THEN v1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and v2 = substr('abc',2,1) GROUP BY pk4,CASE WHEN pk1 > v2 THEN pk1 ELSE v2 END,pk3 ORDER BY pk4,pk3",
+ "SELECT pk3 FROM test WHERE pk1 = 'a' and pk2 = 'b' and v1 = 'c' GROUP BY CASE WHEN pk1 > pk2 THEN v1 WHEN pk1 = pk2 THEN pk1 ELSE pk2 END,pk3 ORDER BY pk3"
+ };
+ int index = 0;
+ for (String query : queries) {
+ QueryPlan plan = getQueryPlan(conn, query);
+ assertTrue((index + 1) + ") " + queries[index], plan.getOrderBy().getOrderByExpressions().isEmpty());
+ index++;
+ }
+ }
+ }
+
+ @Test
+ public void testOrderPreservingGroupByForClientAggregatePlan() throws Exception {
+ Connection conn = null;
+ try {
+ conn = DriverManager.getConnection(getUrl());
+ String tableName1 = "test_table";
+ String sql = "create table " + tableName1 + "( "+
+ " pk1 varchar not null , " +
+ " pk2 varchar not null, " +
+ " pk3 varchar not null," +
+ " v1 varchar, " +
+ " v2 varchar, " +
+ " CONSTRAINT TEST_PK PRIMARY KEY ( "+
+ "pk1,"+
+ "pk2,"+
+ "pk3 ))";
+ conn.createStatement().execute(sql);
+
+ String[] queries = new String[] {
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "group by a.ak3,a.av1 order by a.ak3,a.av1",
+
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 = 'a' GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
+
+ //for InListExpression
+ "select a.ak3 "+
+ "from (select substr(pk1,1,1) ak1,substr(pk2,1,1) ak2,substr(pk3,1,1) ak3,substr(v1,1,1) av1,substr(v2,1,1) av2 from "+tableName1+" order by pk2,pk3 limit 10) a "+
+ "where a.av2 in('a') GROUP BY substr(a.av2,0,1),ak3 ORDER BY ak3",
--- End diff --
yes, there is already a test for more than a single constant in an IN clause in testNotOrderPreservingGroupByForClientAggregatePlan
---