You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by su...@apache.org on 2017/06/06 11:12:27 UTC
lens git commit: LENS-1430 : Expressions are not getting resolved
correctly in union query
Repository: lens
Updated Branches:
refs/heads/master cb2529672 -> 89b17e640
LENS-1430 : Expressions are not getting resolved correctly in union query
Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/89b17e64
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/89b17e64
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/89b17e64
Branch: refs/heads/master
Commit: 89b17e640b47ff9e071f8df53c05ae0feaf25181
Parents: cb25296
Author: Sushil Mohanty <su...@gmail.com>
Authored: Tue Jun 6 16:42:10 2017 +0530
Committer: sushilmohanty <su...@apache.org>
Committed: Tue Jun 6 16:42:10 2017 +0530
----------------------------------------------------------------------
.../lens/cube/parse/ExpressionResolver.java | 36 +++++++++++-
.../cube/parse/TestUnionAndJoinCandidates.java | 60 +++++++++++++++++++-
.../resources/schema/cubes/base/basecube.xml | 12 ++++
.../resources/schema/cubes/base/testcube.xml | 9 +++
.../cubes/derived/union_join_ctx_der1.xml | 2 +
.../schema/facts/union_join_ctx_fact1.xml | 2 +-
.../schema/facts/union_join_ctx_fact3.xml | 1 +
.../schema/facts/union_join_ctx_fact4.xml | 59 +++++++++++++++++++
8 files changed, 177 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
index f38aa54..ea6d5c7 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ExpressionResolver.java
@@ -285,6 +285,7 @@ class ExpressionResolver implements ContextRewriter {
@Getter
private Map<String, Set<ExpressionContext>> allExprsQueried = new HashMap<String, Set<ExpressionContext>>();
private Map<String, Set<PickedExpression>> pickedExpressions = new HashMap<String, Set<PickedExpression>>();
+ private Map<String, ASTNode> nonPickedExpressionsForCandidate = new HashMap<String, ASTNode>();
private final CubeQueryContext cubeql;
ExpressionResolverContext(CubeQueryContext cubeql) {
@@ -407,6 +408,7 @@ class ExpressionResolver implements ContextRewriter {
}
pickedExpressions.clear();
+ nonPickedExpressionsForCandidate.clear();
return exprDims;
}
@@ -426,7 +428,7 @@ class ExpressionResolver implements ContextRewriter {
// Having ast is not copied now, it's maintained in cubeQueryContext, each fact processes that serially.
if (queryAST.getHavingAST() != null) {
replaceAST(cubeql, queryAST.getHavingAST());
- } else if (cubeql.getHavingAST() != null) {
+ } else if (cubeql.getHavingAST() != null && nonPickedExpressionsForCandidate.isEmpty()) {
replaceAST(cubeql, cubeql.getHavingAST());
queryAST.setHavingAST(MetastoreUtil.copyAST(cubeql.getHavingAST()));
}
@@ -459,6 +461,9 @@ class ExpressionResolver implements ContextRewriter {
if (expr != null) {
node1.setChild(i, replaceAlias(expr.getRewrittenAST(), cubeql));
}
+ } else if (nonPickedExpressionsForCandidate.containsKey(column)) {
+ node1.setChild(i, nonPickedExpressionsForCandidate.get(column));
+
}
}
}
@@ -477,7 +482,7 @@ class ExpressionResolver implements ContextRewriter {
return null;
}
- private void pickExpressionsForTable(CandidateTable cTable) {
+ private void pickExpressionsForTable(CandidateTable cTable) throws LensException {
for (Map.Entry<String, Set<ExpressionContext>> ecEntry : allExprsQueried.entrySet()) {
Set<ExpressionContext> ecSet = ecEntry.getValue();
for (ExpressionContext ec : ecSet) {
@@ -488,6 +493,8 @@ class ExpressionResolver implements ContextRewriter {
// pick first evaluable expression
pickedExpressions.computeIfAbsent(ecEntry.getKey(), k -> new HashSet<>())
.add(new PickedExpression(ec.srcAlias, ec.evaluableExpressions.get(cTable).iterator().next()));
+ } else {
+ nonPickedExpressionsForCandidate.put(ecEntry.getKey(), getDefaultExpr(getExprAst(ec)));
}
}
}
@@ -495,6 +502,31 @@ class ExpressionResolver implements ContextRewriter {
}
}
+ private ASTNode getExprAst(ExpressionContext ec) {
+ Set<StorageCandidate> scSet = CandidateUtil.getStorageCandidates(cubeql.getCandidates());
+ Set<String> storageTableNames = new HashSet<String>();
+ for (StorageCandidate sc : scSet) {
+ storageTableNames.add(sc.getStorageTable());
+ }
+ for (CandidateTable table : ec.evaluableExpressions.keySet()) {
+ if (storageTableNames.contains(table.getStorageTable())) {
+ return MetastoreUtil.copyAST(ec.evaluableExpressions.get(table).iterator().next().finalAST);
+ }
+ }
+ return null;
+ }
+
+ private ASTNode getDefaultExpr(ASTNode node) {
+ if (HQLParser.isAggregateAST(node)) {
+ node.setChild(1, new ASTNode(new CommonToken(HiveParser.Identifier, "0.0")));
+ }
+ for (int i = 0; i < node.getChildCount(); i++) {
+ ASTNode child = (ASTNode) node.getChild(i);
+ getDefaultExpr(child);
+ }
+ return node;
+ }
+
void pruneExpressions() {
for (Set<ExpressionContext> ecSet : allExprsQueried.values()) {
for (ExpressionContext ec : ecSet) {
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
index 9422a5c..429e1c6 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestUnionAndJoinCandidates.java
@@ -49,10 +49,12 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite {
getValidStorageTablesKey("union_join_ctx_fact1"), "C1_union_join_ctx_fact1",
getValidStorageTablesKey("union_join_ctx_fact2"), "C1_union_join_ctx_fact2",
getValidStorageTablesKey("union_join_ctx_fact3"), "C1_union_join_ctx_fact3",
+ getValidStorageTablesKey("union_join_ctx_fact4"), "C1_union_join_ctx_fact4",
// Update periods
getValidUpdatePeriodsKey("union_join_ctx_fact1", "C1"), "DAILY",
getValidUpdatePeriodsKey("union_join_ctx_fact2", "C1"), "DAILY",
- getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY");
+ getValidUpdatePeriodsKey("union_join_ctx_fact3", "C1"), "DAILY",
+ getValidUpdatePeriodsKey("union_join_ctx_fact4", "C1"), "DAILY");
conf.setBoolean(DISABLE_AUTO_JOINS, false);
conf.setBoolean(ENABLE_SELECT_TO_GROUPBY, true);
conf.setBoolean(ENABLE_GROUP_BY_TO_SELECT, true);
@@ -90,6 +92,62 @@ public class TestUnionAndJoinCandidates extends TestQueryRewrite {
}
@Test
+ public void testCustomExpressionForJoinCandidate() throws ParseException, LensException {
+ // Expr : (case when union_join_ctx_msr2_expr = 0 then 0 else
+ // union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end) is completely answered by
+ // c1_union_join_ctx_fact4 and partly answered by c1_union_join_ctx_fact3
+ String colsSelected = " union_join_ctx_notnullcityid, union_join_ctx_msr22 , "
+ + "case when union_join_ctx_msr2_expr = 0 then 0 else "
+ + " union_join_ctx_msr4_expr * 100 / union_join_ctx_msr2_expr end";
+ String whereCond = "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")";
+ String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf);
+ assertTrue(rewrittenQuery.contains("UNION ALL"));
+ String expectedInnerSelect1 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 "
+ + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, "
+ + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) as `alias3` "
+ + "FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube";
+ String expectedInnerSelect2 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 "
+ + "else (basecube.union_join_ctx_cityid) end as `alias0`, (basecube.union_join_ctx_msr22) as `alias1`, "
+ + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube";
+ String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, (basecube.alias1) "
+ + "as `union_join_ctx_msr22`, case when ((sum((basecube.alias2)) + 0) = 0) then 0 else "
+ + "(((sum((basecube.alias3)) + 0) * 100) / (sum((basecube.alias2)) + 0)) end as "
+ + "`case when (union_join_ctx_msr2_expr = 0) then 0 "
+ + "else ((union_join_ctx_msr4_expr * 100) / union_join_ctx_msr2_expr) end`";
+ String outerGroupBy = "GROUP BY (basecube.alias0)";
+ compareContains(expectedInnerSelect1, rewrittenQuery);
+ compareContains(expectedInnerSelect2, rewrittenQuery);
+ compareContains(outerSelect, rewrittenQuery);
+ compareContains(outerGroupBy, rewrittenQuery);
+ }
+
+ @Test
+ public void testDuplicateMeasureProjectionInJoinCandidate() throws ParseException, LensException {
+ // union_join_ctx_msr2 is common between two storage candidates and it should be answered from one
+ // and the other fact will have it replaced with 0.0
+ String colsSelected = " union_join_ctx_notnullcityid, sum(union_join_ctx_msr22) , "
+ + "sum(union_join_ctx_msr2), sum(union_join_ctx_msr4) ";
+ String whereCond = "(" + TWO_MONTHS_RANGE_UPTO_DAYS + ")";
+ String rewrittenQuery = rewrite("select " + colsSelected + " from basecube where " + whereCond, conf);
+ assertTrue(rewrittenQuery.contains("UNION ALL"));
+ String expectedInnerSelect1 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 "
+ + "else (basecube.union_join_ctx_cityid) end as `alias0`, 0.0 as `alias1`, "
+ + "sum((basecube.union_join_ctx_msr2)) as `alias2`, sum((basecube.union_join_ctx_msr4)) "
+ + "as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact4 basecube";
+ String expectedInnerSelect2 = "SELECT case when (basecube.union_join_ctx_cityid) is null then 0 else "
+ + "(basecube.union_join_ctx_cityid) end as `alias0`, sum((basecube.union_join_ctx_msr22)) as `alias1`, "
+ + "0.0 as `alias2`, 0.0 as `alias3` FROM TestQueryRewrite.c1_union_join_ctx_fact3 basecube";
+ String outerSelect = "SELECT (basecube.alias0) as `union_join_ctx_notnullcityid`, sum((basecube.alias1)) "
+ + "as `sum(union_join_ctx_msr22)`, sum((basecube.alias2)) as `sum(union_join_ctx_msr2)`, "
+ + "sum((basecube.alias3)) as `sum(union_join_ctx_msr4)` FROM";
+ String outerGroupBy = "GROUP BY (basecube.alias0)";
+ compareContains(expectedInnerSelect1, rewrittenQuery);
+ compareContains(expectedInnerSelect2, rewrittenQuery);
+ compareContains(outerSelect, rewrittenQuery);
+ compareContains(outerGroupBy, rewrittenQuery);
+ }
+
+ @Test(invocationCount = 100)
public void testFinalCandidateRewrittenQuery() throws ParseException, LensException {
try {
// Query with non projected measure in having clause.
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
index 55099c8..708b510 100644
--- a/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
+++ b/lens-cube/src/test/resources/schema/cubes/base/basecube.xml
@@ -46,6 +46,10 @@
</measure>
<measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure">
</measure>
+ <measure _type="INT" name="union_join_ctx_msr22" description="union_join_ctx_second measure">
+ </measure>
+ <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure">
+ </measure>
<measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2"
description="second measure">
</measure>
@@ -249,6 +253,14 @@
description="union_join_ctx_non zero msr2 sum">
<expr_spec expr="sum(case when union_join_ctx_msr2 > 0 then union_join_ctx_msr2 else 0 end)"/>
</expression>
+ <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr"
+ description="union_join_ctx_msr2_expr">
+ <expr_spec expr="sum(union_join_ctx_msr2) + 0"/>
+ </expression>
+ <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr"
+ description="union_join_ctx_msr4_expr">
+ <expr_spec expr="sum(union_join_ctx_msr4) + 0"/>
+ </expression>
<expression _type="double" name="flooredmsr12" display_string="Floored msr12" description="floored measure12">
<expr_spec expr="floor(msr12)"/>
</expression>
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
index ef13700..088c8de 100644
--- a/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
+++ b/lens-cube/src/test/resources/schema/cubes/base/testcube.xml
@@ -39,6 +39,7 @@
description="fifteenth measure"/>
<measure _type="INT" name="union_join_ctx_msr3" description="union_join_ctx_third measure"/>
<measure _type="INT" name="union_join_ctx_msr2" description="union_join_ctx_second measure"/>
+ <measure _type="INT" name="union_join_ctx_msr4" description="union_join_ctx_fourth measure"/>
<measure _type="FLOAT" default_aggr="SUM" unit="RS" name="msr2" display_string="Measure2"
description="second measure"/>
<measure _type="DOUBLE" default_aggr="MAX" name="msr3" display_string="Measure3" description="third measure"/>
@@ -267,6 +268,14 @@
description="union_join_ctx_Not null cityid">
<expr_spec expr="case when union_join_ctx_cityid is null then 0 else union_join_ctx_cityid end"/>
</expression>
+ <expression _type="int" name="union_join_ctx_msr2_expr" display_string="union_join_ctx_msr2_expr"
+ description="union_join_ctx_msr2_expr">
+ <expr_spec expr="sum(union_join_ctx_msr2) + 0"/>
+ </expression>
+ <expression _type="int" name="union_join_ctx_msr4_expr" display_string="union_join_ctx_msr4_expr"
+ description="union_join_ctx_msr4_expr">
+ <expr_spec expr="sum(union_join_ctx_msr4) + 0"/>
+ </expression>
<expression _type="String" name="isindia" display_string="Is Indian City/state" description="is indian city/state">
<expr_spec expr="cubecity.name == 'DELHI' OR cubestate.name == 'KARNATAKA' OR cubestate.name == 'MAHARASHTRA'"/>
</expression>
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
index 69eda6d..c23a029 100644
--- a/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
+++ b/lens-cube/src/test/resources/schema/cubes/derived/union_join_ctx_der1.xml
@@ -35,6 +35,8 @@
<measure_name>union_join_ctx_msr2</measure_name>
<measure_name>union_join_ctx_msr1</measure_name>
<measure_name>union_join_ctx_msr3</measure_name>
+ <measure_name>union_join_ctx_msr4</measure_name>
+ <measure_name>union_join_ctx_msr22</measure_name>
<measure_name>segmsr1</measure_name>
</measure_names>
<dim_attr_names>
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
index d5ac70a..b1f868b 100644
--- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact1.xml
@@ -21,7 +21,7 @@
-->
<x_fact_table name="union_join_ctx_fact1" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1">
<columns>
- <column name="union_join_ctx_msr1" _type="int" comment="first measure"/>
+ <column name="union_join_ctx_msr1" _type="int" comment="second measure"/>
<column name="d_time" _type="timestamp" comment="event time"/>
<column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
<column name="union_join_ctx_cityid" _type="int" comment="city id"/>
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
index 27e859e..b9557a7 100644
--- a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact3.xml
@@ -22,6 +22,7 @@
<x_fact_table name="union_join_ctx_fact3" cube_name="baseCube" weight="5.0" xmlns="uri:lens:cube:0.1">
<columns>
<column name="union_join_ctx_msr2" _type="int" comment="second measure"/>
+ <column name="union_join_ctx_msr22" _type="int" comment="second measure"/>
<column name="d_time" _type="timestamp" comment="event time"/>
<column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
<column name="union_join_ctx_cityid" _type="int" comment="city id"/>
http://git-wip-us.apache.org/repos/asf/lens/blob/89b17e64/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
new file mode 100644
index 0000000..2150304
--- /dev/null
+++ b/lens-cube/src/test/resources/schema/facts/union_join_ctx_fact4.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<x_fact_table name="union_join_ctx_fact4" cube_name="baseCube" weight="6.0" xmlns="uri:lens:cube:0.1">
+ <columns>
+ <column name="union_join_ctx_msr4" _type="int" comment="fourth measure"/>
+ <column name="union_join_ctx_msr2" _type="int" comment="second measure"/>
+ <column name="d_time" _type="timestamp" comment="event time"/>
+ <column name="union_join_ctx_zipcode" _type="int" comment="zip"/>
+ <column name="union_join_ctx_cityid" _type="int" comment="city id"/>
+ </columns>
+ <properties>
+ <property name="cube.fact.union_join_ctx_fact4.cubename" value="baseCube"/>
+ <property name="cube.fact.absolute.start.time" value="$absolute{now.day - 90 days}"/>
+ <property name="cube.fact.union_join_ctx_fact4.c1.updateperiods" value="DAILY"/>
+ <property name="cube.fact.absolute.end.time" value="$absolute{now.day + 7 days}"/>
+ <property name="cube.fact.is.aggregated" value="false"/>
+ <property name="cube.fact.union_join_ctx_fact4.storages" value="C1"/>
+ <property name="cube.table.union_join_ctx_fact4.weight" value="5.0"/>
+ </properties>
+ <storage_tables>
+ <storage_table>
+ <update_periods>
+ <update_period>DAILY</update_period>
+ </update_periods>
+ <storage_name>C1</storage_name>
+ <table_desc external="false">
+ <part_cols>
+ <column name="dt" _type="string" comment="date partition"/>
+ </part_cols>
+ <table_parameters>
+ <property name="cube.storagetable.time.partcols" value="dt"/>
+ </table_parameters>
+ <serde_parameters>
+ <property name="serialization.format" value="1"/>
+ </serde_parameters>
+ <time_part_cols>dt</time_part_cols>
+ </table_desc>
+ </storage_table>
+ </storage_tables>
+</x_fact_table>
\ No newline at end of file