You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2015/10/25 06:57:40 UTC

lens git commit: LENS-835 : Optimizing fact subquery by removing non aggregated expression columns from group by clause

Repository: lens
Updated Branches:
  refs/heads/master 3cd03a37a -> 1f885cd3d


LENS-835 : Optimizing fact subquery by removing non aggregated expression columns from group by clause


Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/1f885cd3
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/1f885cd3
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/1f885cd3

Branch: refs/heads/master
Commit: 1f885cd3d74310ea1cb4e2049692d53d49e5e7cc
Parents: 3cd03a3
Author: Sushil Mohanty <su...@apache.org>
Authored: Sun Oct 25 11:27:20 2015 +0530
Committer: Amareshwari Sriramadasu <am...@apache.org>
Committed: Sun Oct 25 11:27:20 2015 +0530

----------------------------------------------------------------------
 .../lens/driver/jdbc/ColumnarSQLRewriter.java   |  18 ++-
 .../driver/jdbc/TestColumnarSQLRewriter.java    | 149 +++++++++----------
 2 files changed, 85 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
index 2d81ff2..3fdb40e 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
@@ -533,15 +533,19 @@ public class ColumnarSQLRewriter implements QueryRewriter {
       log.debug("AST is null ");
       return;
     }
-    if (node.getToken().getType() == HiveParser.DOT
-      && node.getParent().getChild(0).getType() != HiveParser.Identifier) {
-      String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
-      String column = node.getChild(1).toString().toLowerCase();
+    if (HQLParser.isAggregateAST(node)) {
+      return;
+    } else {
+      if (node.getToken().getType() == HiveParser.DOT
+              && node.getParent().getChild(0).getType() != HiveParser.Identifier) {
+        String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString();
+        String column = node.getChild(1).toString().toLowerCase();
 
-      String factAlias = getFactAlias();
+        String factAlias = getFactAlias();
 
-      if (table.equals(factAlias)) {
-        factKeys.add(factAlias + "." + column);
+        if (table.equals(factAlias)) {
+          factKeys.add(factAlias + "." + column);
+        }
       }
     }
 

http://git-wip-us.apache.org/repos/asf/lens/blob/1f885cd3/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
index 77ab302..41c3c85 100644
--- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
@@ -578,32 +578,31 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , "
             + "round(sum(alias2),  2 ), avg(alias3), min(alias4), max(alias5),  location_name  "
             + "from  (select sales_fact___fact.time_key, sales_fact___fact.location_key, "
-            + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold,sum( case  "
-            + "when (( sales_fact___fact . dollars_sold ) =  0 ) then  0.0  else "
-            + "( sales_fact___fact . dollars_sold ) end ) as alias1, "
+            + "sales_fact___fact.item_key,sum( case  when (( sales_fact___fact . dollars_sold ) =  0 ) "
+            + "then  0.0  else ( sales_fact___fact . dollars_sold ) end ) as alias1, "
             + "sum(( sales_fact___fact . units_sold )) as alias2, "
             + "avg(( sales_fact___fact . dollars_sold )) as alias3, "
-            + "min(( sales_fact___fact . dollars_sold )) as alias4, "
-            + "max(( sales_fact___fact . dollars_sold )) as alias5 "
-            + "from sales_fact sales_fact___fact where sales_fact___fact.time_key "
-            + "in  (  select time_dim .time_key from time_dim where "
-            + "( time_dim. time_key ) between  '2013-01-01'  and  '2013-01-31'  ) "
-            + "and sales_fact___fact.location_key in  (  select location_dim .location_key "
+            + "min(( sales_fact___fact . dollars_sold )) as alias4, max(( sales_fact___fact . dollars_sold )) "
+            + "as alias5 from sales_fact sales_fact___fact where sales_fact___fact.time_key in  "
+            + "(  select time_dim .time_key from time_dim where ( time_dim. time_key ) "
+            + "between  '2013-01-01'  and  '2013-01-31'  ) and "
+            + "sales_fact___fact.location_key in  (  select location_dim .location_key "
             + "from location_dim where (( location_dim. location_name ) =  'test123' ) ) "
-            + "and sales_fact___fact.item_key in  (  select item_dim .item_key from item_dim "
-            + "where (( item_dim. item_name ) =  'item_1' ) )  group by sales_fact___fact.time_key, "
-            + "sales_fact___fact.location_key, sales_fact___fact.item_key, sales_fact___fact.dollars_sold) "
-            + "sales_fact___fact  inner join (select time_key,day_of_week,day from time_dim) "
-            + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
-            + "( time_dim___time_dim . time_key ))  inner join (select location_key,"
-            + "location_name from location_dim) location_dim___location_dim on "
-            + "(( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key ))  "
-            + "inner join (select item_key,item_name from item_dim) item_dim___item_dim on "
-            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) and "
-            + "(( location_dim___location_dim . location_name ) =  'test123' ))  where "
-            + "(( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31'  "
-            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group by "
-            + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+            + "and sales_fact___fact.item_key in  (  select item_dim .item_key from "
+            + "item_dim where (( item_dim. item_name ) =  'item_1' ) )  "
+            + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, "
+            + "sales_fact___fact.item_key) sales_fact___fact  inner join "
+            + "(select time_key,day_of_week,day from time_dim) time_dim___time_dim on "
+            + "(( sales_fact___fact . time_key ) = ( time_dim___time_dim . time_key ))  "
+            + "inner join (select location_key,location_name from location_dim) "
+            + "location_dim___location_dim on (( sales_fact___fact . location_key ) = "
+            + "( location_dim___location_dim . location_key ))  inner join "
+            + "(select item_key,item_name from item_dim) item_dim___item_dim on "
+            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) "
+            + "and (( location_dim___location_dim . location_name ) =  'test123' ))  "
+            + "where (( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31'  "
+            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) "
+            + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
             + "order by dollars_sold  desc";
     compareQueries(expected, actual);
@@ -630,34 +629,35 @@ public class TestColumnarSQLRewriter {
 
     String actual = qtest.rewrite(query, conf, hconf);
     String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
-        + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), sum(alias1) dollars_sold , "
-        + "round(sum(alias2),  2 ), avg(alias6) avg_dollars_sold, min(alias4), max(alias5) max_dollars_sold,  "
-        + "location_name , (avg(alias6) /  1.0 ) "
-        + "from  (select sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key,"
-        + "sales_fact___fact.dollars_sold, sum( case  when (( sales_fact___fact . dollars_sold ) =  0 ) then 0.0  end )"
-        + "as alias1, sum(( sales_fact___fact . units_sold )) as alias2, avg(( sales_fact___fact . dollars_sold )) "
-        + "as alias3, min(( sales_fact___fact . dollars_sold )) as alias4, "
-        + "max(( sales_fact___fact . dollars_sold )) as alias5, "
-        + "avg(( sales_fact___fact . dollars_sold )) as alias6 from sales_fact sales_fact___fact "
-        + "where sales_fact___fact.time_key in  (  select time_dim .time_key from time_dim "
-        + "where ( time_dim. time_key ) between  '2013-01-01'  and  '2013-01-31'  ) "
-        + "and sales_fact___fact.location_key in  (  select location_dim .location_key from "
-        + "location_dim where (( location_dim. location_name ) =  'test123' ) ) and sales_fact___fact.item_key in  "
-        + "(  select item_dim .item_key from item_dim where (( item_dim. item_name ) =  'item_1' ) )  "
-        + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, "
-        + "sales_fact___fact.item_key, sales_fact___fact.dollars_sold) sales_fact___fact  "
-        + "inner join (select time_key, day_of_week, day from time_dim) "
-        + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
-        + "( time_dim___time_dim . time_key ))  inner join (select location_key, "
-        + "location_name from location_dim) location_dim___location_dim "
-        + "on (( sales_fact___fact . location_key ) = ( location_dim___location_dim . location_key ))  "
-        + "inner join (select item_key, item_name from item_dim) item_dim___item_dim "
-        + "on ((( sales_fact___fact . item_key ) = "
-        + "( item_dim___item_dim . item_key )) and (( location_dim___location_dim . location_name ) =  'test123' ))  "
-        + "where (( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31'  "
-        + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group by ( sales_fact___fact . time_key ), "
-        + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ), "
-        + "( item_dim___item_dim . item_key ) order by dollars_sold  desc";
+            + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ), "
+            + "sum(alias1) dollars_sold , round(sum(alias2),  2 ), avg(alias6) avg_dollars_sold , min(alias4), "
+            + "max(alias5) max_dollars_sold ,  location_name , (avg(alias6) /  1.0 ) from  "
+            + "(select sales_fact___fact.time_key, sales_fact___fact.location_key, "
+            + "sales_fact___fact.item_key,sum( case  when (( sales_fact___fact . dollars_sold ) =  0 ) "
+            + "then  0.0  end ) as alias1, sum(( sales_fact___fact . units_sold )) as alias2, "
+            + "avg(( sales_fact___fact . dollars_sold )) as alias3, min(( sales_fact___fact . dollars_sold )) "
+            + "as alias4, max(( sales_fact___fact . dollars_sold )) as alias5, "
+            + "avg(( sales_fact___fact . dollars_sold )) as alias6 "
+            + "from sales_fact sales_fact___fact where sales_fact___fact.time_key in  "
+            + "(  select time_dim .time_key from time_dim where ( time_dim. time_key ) "
+            + "between  '2013-01-01'  and  '2013-01-31'  ) and sales_fact___fact.location_key in  "
+            + "(  select location_dim .location_key from location_dim where "
+            + "(( location_dim. location_name ) =  'test123' ) ) and sales_fact___fact.item_key in  "
+            + "(  select item_dim .item_key from item_dim where (( item_dim. item_name ) =  'item_1' ) )  "
+            + "group by sales_fact___fact.time_key, sales_fact___fact.location_key, sales_fact___fact.item_key) "
+            + "sales_fact___fact  inner join (select time_key,day_of_week,day from time_dim) "
+            + "time_dim___time_dim on (( sales_fact___fact . time_key ) = "
+            + "( time_dim___time_dim . time_key ))  inner join (select location_key,location_name "
+            + "from location_dim) location_dim___location_dim on (( sales_fact___fact . location_key ) "
+            + "= ( location_dim___location_dim . location_key ))  inner join "
+            + "(select item_key,item_name from item_dim) item_dim___item_dim on "
+            + "((( sales_fact___fact . item_key ) = ( item_dim___item_dim . item_key )) "
+            + "and (( location_dim___location_dim . location_name ) =  'test123' ))  "
+            + "where (( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31'  "
+            + "and (( item_dim___item_dim . item_name ) =  'item_1' )) group by "
+            + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ),"
+            + " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+            + "order by dollars_sold  desc";
     compareQueries(expected, actual);
 
   }
@@ -951,33 +951,32 @@ public class TestColumnarSQLRewriter {
     String actual = qtest.rewrite(query, conf, hconf);
     String expected = "select ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ), ((sum(alias1) +  0 ) + (sum(alias2) +  0 )) expr1 , "
-            + "((sum(alias3) *  1000 ) / sum(alias4)) expr2  from  (select sales_fact__db_sales_fact_fact.time_key, "
-            + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, "
-            + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold,"
-            + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, sum(( sales_fact__db_sales_fact_fact "
-            + ". product_count )) as alias2, sum( case  when ( sales_fact__db_sales_fact_fact . status_id ) "
-            + "is null  then  0  when (( sales_fact__db_sales_fact_fact . status_id ) =  1 ) then "
-            + "( sales_fact__db_sales_fact_fact . item_sold ) else  0  end ) as alias3, sum( case  "
-            + "when ( sales_fact__db_sales_fact_fact . status_id ) is null  then  0  when "
-            + "(( sales_fact__db_sales_fact_fact . status_id ) =  2 ) then "
-            + "( sales_fact__db_sales_fact_fact . dollars_sold ) else  0  end ) as alias4 "
-            + "from db.sales_fact sales_fact__db_sales_fact_fact where sales_fact__db_sales_fact_fact.time_key in "
-            + " (  select time_dim .time_key from time_dim where ( time_dim. time_key ) "
-            + "between  '2013-01-01'  and  '2013-01-31'  ) and sales_fact__db_sales_fact_fact.location_key in "
-            + " (  select location_dim .location_key from location_dim where (( location_dim. location_name ) "
-            + "=  'test123' ) )  group by sales_fact__db_sales_fact_fact.time_key, "
-            + "sales_fact__db_sales_fact_fact.location_key, sales_fact__db_sales_fact_fact.status_id, "
-            + "sales_fact__db_sales_fact_fact.item_sold, sales_fact__db_sales_fact_fact.dollars_sold) "
+            + "((sum(alias3) *  1000 ) / sum(alias4)) expr2  from  "
+            + "(select sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key,"
+            + "sum(( sales_fact__db_sales_fact_fact . item_count )) as alias1, "
+            + "sum(( sales_fact__db_sales_fact_fact . product_count )) as alias2, "
+            + "sum( case  when ( sales_fact__db_sales_fact_fact . status_id ) is null  then  0  "
+            + "when (( sales_fact__db_sales_fact_fact . status_id ) =  1 ) then "
+            + "( sales_fact__db_sales_fact_fact . item_sold ) else  0  end ) as alias3, "
+            + "sum( case  when ( sales_fact__db_sales_fact_fact . status_id ) is null  "
+            + "then  0  when (( sales_fact__db_sales_fact_fact . status_id ) =  2 ) "
+            + "then ( sales_fact__db_sales_fact_fact . dollars_sold ) else  0  end ) as "
+            + "alias4 from db.sales_fact sales_fact__db_sales_fact_fact where "
+            + "sales_fact__db_sales_fact_fact.time_key in  (  select time_dim .time_key from time_dim "
+            + "where ( time_dim. time_key ) between  '2013-01-01'  and  '2013-01-31'  ) "
+            + "and sales_fact__db_sales_fact_fact.location_key in  (  select location_dim .location_key "
+            + "from location_dim where (( location_dim. location_name ) =  'test123' ) )  "
+            + "group by sales_fact__db_sales_fact_fact.time_key, sales_fact__db_sales_fact_fact.location_key) "
             + "sales_fact__db_sales_fact_fact  inner join (select time_key,day_of_week,day from time_dim) "
             + "time_dim___time_dim on (( sales_fact__db_sales_fact_fact . time_key ) = "
-            + "( time_dim___time_dim . time_key ))  inner join (select location_key,location_name "
-            + "from db.location_dim) location_dim__db_location_dim_ld on ((( sales_fact__db_sales_fact_fact . "
-            + "location_key ) = ( location_dim__db_location_dim_ld . location_key )) and "
+            + "( time_dim___time_dim . time_key ))  inner join (select location_key,"
+            + "location_name from db.location_dim) location_dim__db_location_dim_ld on "
+            + "((( sales_fact__db_sales_fact_fact . location_key ) = "
+            + "( location_dim__db_location_dim_ld . location_key )) and "
             + "(( location_dim__db_location_dim_ld . location_name ) =  'test123' ))  "
-            + "where ( time_dim___time_dim . time_key ) between  '2013-01-01'  and  "
-            + "'2013-01-31'  group by ( sales_fact__db_sales_fact_fact . time_key ), "
-            + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
-            + "order by dollars_sold  desc";
+            + "where ( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31'  "
+            + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+            + "( time_dim___time_dim . day ) order by dollars_sold  desc";
 
     compareQueries(expected, actual);
   }