You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2015/10/25 06:57:40 UTC
lens git commit: LENS-835 : Optimizing fact subquery by removing non
aggregated expression columns from group by clause
Repository: lens
Updated Branches:
refs/heads/master 3cd03a37a -> 1f885cd3d
LENS-835 : Optimizing fact subquery by removing non aggregated expression columns from group by clause
Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/1f885cd3
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/1f885cd3
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/1f885cd3
Branch: refs/heads/master
Commit: 1f885cd3d74310ea1cb4e2049692d53d49e5e7cc
Parents: 3cd03a3
Author: Sushil Mohanty <su...@apache.org>
Authored: Sun Oct 25 11:27:20 2015 +0530
Committer: Amareshwari Sriramadasu <am...@apache.org>
Committed: Sun Oct 25 11:27:20 2015 +0530
----------------------------------------------------------------------
.../lens/driver/jdbc/ColumnarSQLRewriter.java | 18 ++-
.../driver/jdbc/TestColumnarSQLRewriter.java | 149 +++++++++----------
2 files changed, 85 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
index 2d81ff2..3fdb40e 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
@@ -533,15 +533,19 @@ public class ColumnarSQLRewriter implements QueryRewriter {
log.debug("AST is null ");
return;
}
- if (node.getToken().getType() == HiveParser.DOT
- && node.getParent().getChild(0).getType() != HiveParser.Identifier) {
- String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
- String column = node.getChild(1).toString().toLowerCase();
+ if (HQLParser.isAggregateAST(node)) {
+ return;
+ } else {
+ if (node.getToken().getType() == HiveParser.DOT
+ && node.getParent().getChild(0).getType() != HiveParser.Identifier) {
+ String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
+ String column = node.getChild(1).toString().toLowerCase();
- String factAlias = getFactAlias();
+ String factAlias = getFactAlias();
- if (table.equals(factAlias)) {
- factKeys.add(factAlias + "." + column);
+ if (table.equals(factAlias)) {
+ factKeys.add(factAlias + "." + column);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
index 77ab302..41c3c85 100644
--- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
@@ -578,32 +578,31 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , "
+ "round(sum(alias2), 2 ), avg(alias3), min(alias4), max(alias5), location_name "
+ "from (select sales_fact___fact.time_key, sales_fact___fact.location_key, "
- + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold,sum( case "
- + "when (( sales_fact___fact . dollars_sold ) = 0 ) then 0.0 else "
- + "( sales_fact___fact . dollars_sold ) end ) as alias1, "
+ + "sales_fact___fact.item_key,sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) "
+ + "then 0.0 else ( sales_fact___fact . dollars_sold ) end ) as alias1, "
+ "sum(( sales_fact___fact . units_sold )) as alias2, "
+ "avg(( sales_fact___fact . dollars_sold )) as alias3, "
- + "min(( sales_fact___fact . dollars_sold )) as alias4, "
- + "max(( sales_fact___fact . dollars_sold )) as alias5 "
- + "from sales_fact sales_fact___fact where sales_fact___fact.time_key "
- + "in ( select time_dim .time_key from time_dim where "
- + "( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) "
- + "and sales_fact___fact.location_key in ( select location_dim .location_key "
+ + "min(( sales_fact___fact . dollars_sold )) as alias4, max(( sales_fact___fact . dollars_sold )) "
+ + "as alias5 from sales_fact sales_fact___fact where sales_fact___fact.time_key in "
+ + "( select time_dim .time_key from time_dim where ( time_dim. time_key ) "
+ + "between '2013-01-01' and '2013-01-31' ) and "
+ + "sales_fact___fact.location_key in ( select location_dim .location_key "
+ "from location_dim where (( location_dim. location_name ) = 'test123' ) ) "
- + "and sales_fact___fact.item_key in ( select item_dim .item_key from item_dim "
- + "where (( item_dim. item_name ) = 'item_1' ) ) group by sales_fact___fact.time_key, "
- + "sales_fact___fact.location_key, sales_fact___fact.item_key, sales_fact___fact.dollars_sold) "
- + "sales_fact___fact inner join (select time_key,day_of_week,day from time_dim) "
- + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
- + "( time_dim___time_dim . time_key )) inner join (select location_key,"
- + "location_name from location_dim) location_dim___location_dim on "
- + "(( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) "
- + "inner join (select item_key,item_name from item_dim) item_dim___item_dim on "
- + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) and "
- + "(( location_dim___location_dim . location_name ) = 'test123' )) where "
- + "(( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' "
- + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by "
- + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ + "and sales_fact___fact.item_key in ( select item_dim .item_key from "
+ + "item_dim where (( item_dim. item_name ) = 'item_1' ) ) "
+ + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, "
+ + "sales_fact___fact.item_key) sales_fact___fact inner join "
+ + "(select time_key,day_of_week,day from time_dim) time_dim___time_dim on "
+ + "(( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key )) "
+ + "inner join (select location_key,location_name from location_dim) "
+ + "location_dim___location_dim on (( sales_fact___fact . location_key ) = "
+ + "( location_dim___location_dim . location_key )) inner join "
+ + "(select item_key,item_name from item_dim) item_dim___item_dim on "
+ + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) "
+ + "and (( location_dim___location_dim . location_name ) = 'test123' )) "
+ + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' "
+ + "and (( item_dim___item_dim . item_name ) = 'item_1' )) "
+ + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ "order by dollars_sold desc";
compareQueries(expected, actual);
@@ -630,34 +629,35 @@ public class TestColumnarSQLRewriter {
String actual = qtest.rewrite(query, conf, hconf);
String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
- + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , "
- + "round(sum(alias2), 2 ), avg(alias6) avg_dollars_sold, min(alias4), max(alias5) max_dollars_sold, "
- + "location_name , (avg(alias6) / 1.0 ) "
- + "from (select sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key,"
- + "sales_fact___fact.dollars_sold, sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) then 0.0 end )"
- + "as alias1, sum(( sales_fact___fact . units_sold )) as alias2, avg(( sales_fact___fact . dollars_sold )) "
- + "as alias3, min(( sales_fact___fact . dollars_sold )) as alias4, "
- + "max(( sales_fact___fact . dollars_sold )) as alias5, "
- + "avg(( sales_fact___fact . dollars_sold )) as alias6 from sales_fact sales_fact___fact "
- + "where sales_fact___fact.time_key in ( select time_dim .time_key from time_dim "
- + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) "
- + "and sales_fact___fact.location_key in ( select location_dim .location_key from "
- + "location_dim where (( location_dim. location_name ) = 'test123' ) ) and sales_fact___fact.item_key in "
- + "( select item_dim .item_key from item_dim where (( item_dim. item_name ) = 'item_1' ) ) "
- + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, "
- + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold) sales_fact___fact "
- + "inner join (select time_key, day_of_week, day from time_dim) "
- + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
- + "( time_dim___time_dim . time_key )) inner join (select location_key, "
- + "location_name from location_dim) location_dim___location_dim "
- + "on (( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key )) "
- + "inner join (select item_key, item_name from item_dim) item_dim___item_dim "
- + "on ((( sales_fact___fact . item_key ) = "
- + "( item_dim___item_dim . item_key )) and (( location_dim___location_dim . location_name ) = 'test123' )) "
- + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' "
- + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by ( sales_fact___fact . time_key ), "
- + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ), "
- + "( item_dim___item_dim . item_key ) order by dollars_sold desc";
+ + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), "
+ + "sum(alias1) dollars_sold , round(sum(alias2), 2 ), avg(alias6) avg_dollars_sold , min(alias4), "
+ + "max(alias5) max_dollars_sold , location_name , (avg(alias6) / 1.0 ) from "
+ + "(select sales_fact___fact.time_key, sales_fact___fact.location_key, "
+ + "sales_fact___fact.item_key,sum( case when (( sales_fact___fact . dollars_sold ) = 0 ) "
+ + "then 0.0 end ) as alias1, sum(( sales_fact___fact . units_sold )) as alias2, "
+ + "avg(( sales_fact___fact . dollars_sold )) as alias3, min(( sales_fact___fact . dollars_sold )) "
+ + "as alias4, max(( sales_fact___fact . dollars_sold )) as alias5, "
+ + "avg(( sales_fact___fact . dollars_sold )) as alias6 "
+ + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in "
+ + "( select time_dim .time_key from time_dim where ( time_dim. time_key ) "
+ + "between '2013-01-01' and '2013-01-31' ) and sales_fact___fact.location_key in "
+ + "( select location_dim .location_key from location_dim where "
+ + "(( location_dim. location_name ) = 'test123' ) ) and sales_fact___fact.item_key in "
+ + "( select item_dim .item_key from item_dim where (( item_dim. item_name ) = 'item_1' ) ) "
+ + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key) "
+ + "sales_fact___fact inner join (select time_key,day_of_week,day from time_dim) "
+ + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
+ + "( time_dim___time_dim . time_key )) inner join (select location_key,location_name "
+ + "from location_dim) location_dim___location_dim on (( sales_fact___fact . location_key ) "
+ + "= ( location_dim___location_dim . location_key )) inner join "
+ + "(select item_key,item_name from item_dim) item_dim___item_dim on "
+ + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) "
+ + "and (( location_dim___location_dim . location_name ) = 'test123' )) "
+ + "where (( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' "
+ + "and (( item_dim___item_dim . item_name ) = 'item_1' )) group by "
+ + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ),"
+ + " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ + "order by dollars_sold desc";
compareQueries(expected, actual);
}
@@ -951,33 +951,32 @@ public class TestColumnarSQLRewriter {
String actual = qtest.rewrite(query, conf, hconf);
String expected = "select ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ), ((sum(alias1) + 0 ) + (sum(alias2) + 0 )) expr1 , "
- + "((sum(alias3) * 1000 ) / sum(alias4)) expr2 from (select sales_fact__db_sales_fact_fact.time_key, "
- + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, "
- + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold,"
- + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, sum(( sales_fact__db_sales_fact_fact "
- + ". product_count )) as alias2, sum( case when ( sales_fact__db_sales_fact_fact . status_id ) "
- + "is null then 0 when (( sales_fact__db_sales_fact_fact . status_id ) = 1 ) then "
- + "( sales_fact__db_sales_fact_fact . item_sold ) else 0 end ) as alias3, sum( case "
- + "when ( sales_fact__db_sales_fact_fact . status_id ) is null then 0 when "
- + "(( sales_fact__db_sales_fact_fact . status_id ) = 2 ) then "
- + "( sales_fact__db_sales_fact_fact . dollars_sold ) else 0 end ) as alias4 "
- + "from db.sales_fact sales_fact__db_sales_fact_fact where sales_fact__db_sales_fact_fact.time_key in "
- + " ( select time_dim .time_key from time_dim where ( time_dim. time_key ) "
- + "between '2013-01-01' and '2013-01-31' ) and sales_fact__db_sales_fact_fact.location_key in "
- + " ( select location_dim .location_key from location_dim where (( location_dim. location_name ) "
- + "= 'test123' ) ) group by sales_fact__db_sales_fact_fact.time_key, "
- + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, "
- + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold) "
+ + "((sum(alias3) * 1000 ) / sum(alias4)) expr2 from "
+ + "(select sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key,"
+ + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, "
+ + "sum(( sales_fact__db_sales_fact_fact . product_count )) as alias2, "
+ + "sum( case when ( sales_fact__db_sales_fact_fact . status_id ) is null then 0 "
+ + "when (( sales_fact__db_sales_fact_fact . status_id ) = 1 ) then "
+ + "( sales_fact__db_sales_fact_fact . item_sold ) else 0 end ) as alias3, "
+ + "sum( case when ( sales_fact__db_sales_fact_fact . status_id ) is null "
+ + "then 0 when (( sales_fact__db_sales_fact_fact . status_id ) = 2 ) "
+ + "then ( sales_fact__db_sales_fact_fact . dollars_sold ) else 0 end ) as "
+ + "alias4 from db.sales_fact sales_fact__db_sales_fact_fact where "
+ + "sales_fact__db_sales_fact_fact.time_key in ( select time_dim .time_key from time_dim "
+ + "where ( time_dim. time_key ) between '2013-01-01' and '2013-01-31' ) "
+ + "and sales_fact__db_sales_fact_fact.location_key in ( select location_dim .location_key "
+ + "from location_dim where (( location_dim. location_name ) = 'test123' ) ) "
+ + "group by sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key) "
+ "sales_fact__db_sales_fact_fact inner join (select time_key,day_of_week,day from time_dim) "
+ "time_dim___time_dim on (( sales_fact__db_sales_fact_fact . time_key ) = "
- + "( time_dim___time_dim . time_key )) inner join (select location_key,location_name "
- + "from db.location_dim) location_dim__db_location_dim_ld on ((( sales_fact__db_sales_fact_fact . "
- + "location_key ) = ( location_dim__db_location_dim_ld . location_key )) and "
+ + "( time_dim___time_dim . time_key )) inner join (select location_key,"
+ + "location_name from db.location_dim) location_dim__db_location_dim_ld on "
+ + "((( sales_fact__db_sales_fact_fact . location_key ) = "
+ + "( location_dim__db_location_dim_ld . location_key )) and "
+ "(( location_dim__db_location_dim_ld . location_name ) = 'test123' )) "
- + "where ( time_dim___time_dim . time_key ) between '2013-01-01' and "
- + "'2013-01-31' group by ( sales_fact__db_sales_fact_fact . time_key ), "
- + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
- + "order by dollars_sold desc";
+ + "where ( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' "
+ + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ + "( time_dim___time_dim . day ) order by dollars_sold desc";
compareQueries(expected, actual);
}