You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by su...@apache.org on 2017/06/06 11:12:27 UTC

lens git commit: LENS-1430 : Expressions are not getting resolved correctly in union query

Repository: lens
Updated Branches:
  refs/heads/master cb2529672 -> 89b17e640


LENS-1430 : Expressions are not getting resolved correctly in union query


Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/89b17e64
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/89b17e64
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/89b17e64

Branch: refs/heads/master
Commit: 89b17e640b47ff9e071f8df53c05ae0feaf25181
Parents: cb25296
Author: Sushil Mohanty <su...@gmail.com>
Authored: Tue Jun 6 16:42:10 2017 +0530
Committer: sushilmohanty <su...@apache.org>
Committed: Tue Jun 6 16:42:10 2017 +0530

----------------------------------------------------------------------
 .../lens/cube/parse/ExpressionResolver.java     | 36 +++++++++++-
 .../cube/parse/TestUnionAndJoinCandidates.java  | 60 +++++++++++++++++++-
 .../resources/schema/cubes/base/basecube.xml    | 12 ++++
 .../resources/schema/cubes/base/testcube.xml    |  9 +++
 .../cubes/derived/union_join_ctx_der1.xml       |  2 +
 .../schema/facts/union_join_ctx_fact1.xml       |  2 +-
 .../schema/facts/union_join_ctx_fact3.xml       |  1 +
 .../schema/facts/union_join_ctx_fact4.xml       | 59 +++++++++++++++++++
 8 files changed, 177 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
index f38aa54..ea6d5c7 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
@@ -285,6 +285,7 @@ class ExpressionResolver implements ContextRewriter {
     @Getter
     private Map<String, Set<ExpressionContext>> allExprsQueried = new HashMap<String, Set<ExpressionContext>>();
     private Map<String, Set<PickedExpression>> pickedExpressions = new HashMap<String, Set<PickedExpression>>();
+    private Map<String, ASTNode> nonPickedExpressionsForCandidate = new HashMap<String, ASTNode>();
     private final CubeQueryContext cubeql;
 
     ExpressionResolverContext(CubeQueryContext cubeql) {
@@ -407,6 +408,7 @@ class ExpressionResolver implements ContextRewriter {
       }
 
       pickedExpressions.clear();
+      nonPickedExpressionsForCandidate.clear();
 
       return exprDims;
     }
@@ -426,7 +428,7 @@ class ExpressionResolver implements ContextRewriter {
       // Having ast is not copied now, it's maintained in cubeQueryContext, each fact processes that serially.
       if (queryAST.getHavingAST() != null) {
         replaceAST(cubeql, queryAST.getHavingAST());
-      } else if (cubeql.getHavingAST() != null) {
+      } else if (cubeql.getHavingAST() != null && nonPickedExpressionsForCandidate.isEmpty()) {
         replaceAST(cubeql, cubeql.getHavingAST());
         queryAST.setHavingAST(MetastoreUtil.copyAST(cubeql.getHavingAST()));
       }
@@ -459,6 +461,9 @@ class ExpressionResolver implements ContextRewriter {
               if (expr != null) {
                 node1.setChild(i, replaceAlias(expr.getRewrittenAST(), cubeql));
               }
+            } else if (nonPickedExpressionsForCandidate.containsKey(column)) {
+              node1.setChild(i, nonPickedExpressionsForCandidate.get(column));
+
             }
           }
         }
@@ -477,7 +482,7 @@ class ExpressionResolver implements ContextRewriter {
       return null;
     }
 
-    private void pickExpressionsForTable(CandidateTable cTable) {
+    private void pickExpressionsForTable(CandidateTable cTable) throws LensException {
       for (Map.Entry<String, Set<ExpressionContext>> ecEntry : allExprsQueried.entrySet()) {
         Set<ExpressionContext> ecSet = ecEntry.getValue();
         for (ExpressionContext ec : ecSet) {
@@ -488,6 +493,8 @@ class ExpressionResolver implements ContextRewriter {
                 // pick first evaluable expression
                 pickedExpressions.computeIfAbsent(ecEntry.getKey(), k -> new HashSet<>())
                   .add(new PickedExpression(ec.srcAlias, ec.evaluableExpressions.get(cTable).iterator().next()));
+              } else {
+                nonPickedExpressionsForCandidate.put(ecEntry.getKey(), getDefaultExpr(getExprAst(ec)));
               }
             }
           }
@@ -495,6 +502,31 @@ class ExpressionResolver implements ContextRewriter {
       }
     }
 
+    private ASTNode getExprAst(ExpressionContext ec) {
+      Set<StorageCandidate> scSet = CandidateUtil.getStorageCandidates(cubeql.getCandidates());
+      Set<String> storageTableNames = new HashSet<String>();
+      for (StorageCandidate sc : scSet) {
+        storageTableNames.add(sc.getStorageTable());
+      }
+      for (CandidateTable table : ec.evaluableExpressions.keySet()) {
+        if (storageTableNames.contains(table.getStorageTable())) {
+          return  MetastoreUtil.copyAST(ec.evaluableExpressions.get(table).iterator().next().finalAST);
+        }
+      }
+      return null;
+    }
+
+    private  ASTNode getDefaultExpr(ASTNode node) {
+      if (HQLParser.isAggregateAST(node)) {
+        node.setChild(1, new ASTNode(new CommonToken(HiveParser.Identifier, "0.0")));
+      }
+      for (int i = 0; i < node.getChildCount(); i++) {
+        ASTNode child = (ASTNode) node.getChild(i);
+        getDefaultExpr(child);
+      }
+      return node;
+    }
+
     void pruneExpressions() {
       for (Set<ExpressionContext> ecSet : allExprsQueried.values()) {
         for (ExpressionContext ec : ecSet) {

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
index 9422a5c..429e1c6 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
@@ -49,10 +49,12 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite {
         getValidStorageTablesKey("union_join_ctx_fact1"), "C1_union_join_ctx_fact1",
         getValidStorageTablesKey("union_join_ctx_fact2"), "C1_union_join_ctx_fact2",
         getValidStorageTablesKey("union_join_ctx_fact3"), "C1_union_join_ctx_fact3",
+        getValidStorageTablesKey("union_join_ctx_fact4"), "C1_union_join_ctx_fact4",
         // Update periods
         getValidUpdatePeriodsKey("union_join_ctx_fact1", "C1"), "DAILY",
         getValidUpdatePeriodsKey("union_join_ctx_fact2", "C1"), "DAILY",
-        getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY");
+        getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY",
+        getValidUpdatePeriodsKey("union_join_ctx_fact4", "C1"), "DAILY");
     conf.setBoolean(DISABLE_AUTO_JOINS, false);
     conf.setBoolean(ENABLE_SELECT_TO_GROUPBY, true);
     conf.setBoolean(ENABLE_GROUP_BY_TO_SELECT, true);
@@ -90,6 +92,62 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite {
   }
 
   @Test
+  public void testCustomExpressionForJoinCandidate() throws ParseException, LensException {
+    // Expr : (case when union_join_ctx_msr2_expr = 0 then 0 else
+    // union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end) is completely answered by
+    // c1_union_join_ctx_fact4 and partly answered by c1_union_join_ctx_fact3
+    String colsSelected = " union_join_ctx_notnullcityid, union_join_ctx_msr22 , "
+        + "case when union_join_ctx_msr2_expr = 0 then 0 else "
+        + " union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end";
+    String whereCond =  "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")";
+    String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf);
+    assertTrue(rewrittenQuery.contains("UNION ALL"));
+    String expectedInnerSelect1 = "SELECT case  when (basecube.union_join_ctx_cityid) is null then 0 "
+        + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, "
+        + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) as `alias3` "
+        + "FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube";
+    String expectedInnerSelect2 = "SELECT case  when (basecube.union_join_ctx_cityid) is null then 0 "
+        + "else (basecube.union_join_ctx_cityid) end as `alias0`, (basecube.union_join_ctx_msr22) as `alias1`, "
+        + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube";
+    String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, (basecube.alias1) "
+        + "as `union_join_ctx_msr22`, case  when ((sum((basecube.alias2)) + 0) = 0) then 0 else "
+        + "(((sum((basecube.alias3)) + 0) * 100) / (sum((basecube.alias2)) + 0)) end as "
+        + "`case  when (union_join_ctx_msr2_expr = 0) then 0 "
+        + "else ((union_join_ctx_msr4_expr * 100) / union_join_ctx_msr2_expr) end`";
+    String outerGroupBy = "GROUP BY (basecube.alias0)";
+    compareContains(expectedInnerSelect1, rewrittenQuery);
+    compareContains(expectedInnerSelect2, rewrittenQuery);
+    compareContains(outerSelect, rewrittenQuery);
+    compareContains(outerGroupBy, rewrittenQuery);
+  }
+
+  @Test
+  public void testDuplicateMeasureProjectionInJoinCandidate() throws ParseException, LensException {
+    // union_join_ctx_msr2 is common between two storage candidates and it should be answered from one
+    // and the other fact will have it replaced with 0.0
+    String colsSelected = " union_join_ctx_notnullcityid, sum(union_join_ctx_msr22) , "
+        + "sum(union_join_ctx_msr2), sum(union_join_ctx_msr4) ";
+    String whereCond =  "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")";
+    String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf);
+    assertTrue(rewrittenQuery.contains("UNION ALL"));
+    String expectedInnerSelect1 = "SELECT case  when (basecube.union_join_ctx_cityid) is null then 0 "
+        + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, "
+        + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) "
+        + "as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube";
+    String expectedInnerSelect2 = "SELECT case  when (basecube.union_join_ctx_cityid) is null then 0 else "
+        + "(basecube.union_join_ctx_cityid) end as `alias0`, sum((basecube.union_join_ctx_msr22)) as `alias1`, "
+        + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube";
+    String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, sum((basecube.alias1)) "
+        + "as `sum(union_join_ctx_msr22)`, sum((basecube.alias2)) as `sum(union_join_ctx_msr2)`, "
+        + "sum((basecube.alias3)) as `sum(union_join_ctx_msr4)` FROM";
+    String outerGroupBy = "GROUP BY (basecube.alias0)";
+    compareContains(expectedInnerSelect1, rewrittenQuery);
+    compareContains(expectedInnerSelect2, rewrittenQuery);
+    compareContains(outerSelect, rewrittenQuery);
+    compareContains(outerGroupBy, rewrittenQuery);
+  }
+
+  @Test(invocationCount = 100)
   public void testFinalCandidateRewrittenQuery() throws ParseException, LensException {
     try {
       // Query with non projected measure in having clause.

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
index 55099c8..708b510 100644
--- a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
+++ b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
@@ -46,6 +46,10 @@
     </measure>
     <measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure">
     </measure>
+    <measure _type="INT" name="union_join_ctx_msr22" description="union_join_ctx_second measure">
+    </measure>
+    <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure">
+    </measure>
     <measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2"
              description="second measure">
     </measure>
@@ -249,6 +253,14 @@
                 description="union_join_ctx_non zero msr2 sum">
       <expr_spec expr="sum(case when union_join_ctx_msr2 &gt; 0 then union_join_ctx_msr2 else 0 end)"/>
     </expression>
+    <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr"
+                description="union_join_ctx_msr2_expr">
+      <expr_spec expr="sum(union_join_ctx_msr2) + 0"/>
+    </expression>
+    <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr"
+                description="union_join_ctx_msr4_expr">
+      <expr_spec expr="sum(union_join_ctx_msr4) + 0"/>
+    </expression>
     <expression _type="double" name="flooredmsr12" display_string="Floored msr12" description="floored measure12">
       <expr_spec expr="floor(msr12)"/>
     </expression>

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
index ef13700..088c8de 100644
--- a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
+++ b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
@@ -39,6 +39,7 @@
              description="fifteenth measure"/>
     <measure _type="INT" name="union_join_ctx_msr3" description="union_join_ctx_third measure"/>
     <measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure"/>
+    <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure"/>
     <measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2"
              description="second measure"/>
     <measure _type="DOUBLE" default_aggr="MAX" name="msr3" display_string="Measure3" description="third measure"/>
@@ -267,6 +268,14 @@
                 description="union_join_ctx_Not null cityid">
       <expr_spec expr="case when union_join_ctx_cityid is null then 0 else union_join_ctx_cityid end"/>
     </expression>
+    <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr"
+                description="union_join_ctx_msr2_expr">
+      <expr_spec expr="sum(union_join_ctx_msr2) + 0"/>
+    </expression>
+    <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr"
+                description="union_join_ctx_msr4_expr">
+      <expr_spec expr="sum(union_join_ctx_msr4) + 0"/>
+    </expression>
     <expression _type="String" name="isindia" display_string="Is Indian City/state" description="is indian city/state">
       <expr_spec expr="cubecity.name == 'DELHI' OR cubestate.name == 'KARNATAKA' OR cubestate.name == 'MAHARASHTRA'"/>
     </expression>

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
index 69eda6d..c23a029 100644
--- a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
+++ b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
@@ -35,6 +35,8 @@
     <measure_name>union_join_ctx_msr2</measure_name>
     <measure_name>union_join_ctx_msr1</measure_name>
     <measure_name>union_join_ctx_msr3</measure_name>
+    <measure_name>union_join_ctx_msr4</measure_name>
+    <measure_name>union_join_ctx_msr22</measure_name>
     <measure_name>segmsr1</measure_name>
   </measure_names>
   <dim_attr_names>

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
index d5ac70a..b1f868b 100644
--- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
@@ -21,7 +21,7 @@
 -->
 <x_fact_table name="union_join_ctx_fact1" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1">
   <columns>
-    <column name="union_join_ctx_msr1" _type="int" comment="first measure"/>
+    <column name="union_join_ctx_msr1" _type="int" comment="second measure"/>
     <column name="d_time" _type="timestamp" comment="event time"/>
     <column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
     <column name="union_join_ctx_cityid" _type="int" comment="city id"/>

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
index 27e859e..b9557a7 100644
--- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
@@ -22,6 +22,7 @@
 <x_fact_table name="union_join_ctx_fact3" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1">
   <columns>
     <column name="union_join_ctx_msr2" _type="int" comment="second measure"/>
+    <column name="union_join_ctx_msr22" _type="int" comment="second measure"/>
     <column name="d_time" _type="timestamp" comment="event time"/>
     <column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
     <column name="union_join_ctx_cityid" _type="int" comment="city id"/>

http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
new file mode 100644
index 0000000..2150304
--- /dev/null
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements. See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership. The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License. You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied. See the License for the
+  specific language governing permissions and limitations
+  under the License.
+
+-->
+<x_fact_table name="union_join_ctx_fact4" cube_name="baseCube" weight="6.0" xmlns="uri:lens:cube:0.1">
+    <columns>
+        <column name="union_join_ctx_msr4" _type="int" comment="fourth measure"/>
+        <column name="union_join_ctx_msr2" _type="int" comment="second measure"/>
+        <column name="d_time" _type="timestamp" comment="event time"/>
+        <column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
+        <column name="union_join_ctx_cityid" _type="int" comment="city id"/>
+    </columns>
+    <properties>
+        <property name="cube.fact.union_join_ctx_fact4.cubename" value="baseCube"/>
+        <property name="cube.fact.absolute.start.time" value="$absolute{now.day - 90 days}"/>
+        <property name="cube.fact.union_join_ctx_fact4.c1.updateperiods" value="DAILY"/>
+        <property name="cube.fact.absolute.end.time" value="$absolute{now.day + 7 days}"/>
+        <property name="cube.fact.is.aggregated" value="false"/>
+        <property name="cube.fact.union_join_ctx_fact4.storages" value="C1"/>
+        <property name="cube.table.union_join_ctx_fact4.weight" value="5.0"/>
+    </properties>
+    <storage_tables>
+        <storage_table>
+            <update_periods>
+                <update_period>DAILY</update_period>
+            </update_periods>
+            <storage_name>C1</storage_name>
+            <table_desc external="false">
+                <part_cols>
+                    <column name="dt" _type="string" comment="date partition"/>
+                </part_cols>
+                <table_parameters>
+                    <property name="cube.storagetable.time.partcols" value="dt"/>
+                </table_parameters>
+                <serde_parameters>
+                    <property name="serialization.format" value="1"/>
+                </serde_parameters>
+                <time_part_cols>dt</time_part_cols>
+            </table_desc>
+        </storage_table>
+    </storage_tables>
+</x_fact_table>
\ No newline at end of file