You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2015/01/11 07:15:32 UTC
[1/2] incubator-lens git commit: LENS-88 : Adds support for multiple
chains to same destination table in same query (Rajat Khandelwal via
amareshwari)
Repository: incubator-lens
Updated Branches:
refs/heads/master 8c722a523 -> e2c0547d7
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/test/java/org/apache/lens/cube/parse/TestJoinResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestJoinResolver.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestJoinResolver.java
index 7e51a82..8787b80 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestJoinResolver.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestJoinResolver.java
@@ -184,7 +184,7 @@ public class TestJoinResolver extends TestQueryRewrite {
}
System.out.println("testAutoJoinResolverExpected1" + expectedClauses);
System.out.println("testAutoJoinResolverActual1" + actualClauses);
- Assert.assertEquals(expectedClauses, actualClauses);
+ Assert.assertEqualsNoOrder(expectedClauses.toArray(), actualClauses.toArray());
// Test 2 Dim only query
expectedClauses.clear();
@@ -228,16 +228,16 @@ public class TestJoinResolver extends TestQueryRewrite {
String hql = rewrittenQuery.toHQL();
System.out.println("testPartialJoinResolver Partial join hql: " + hql);
String partSQL =
- " left outer join " + getDbName() + "c1_citytable citydim on testcube.cityid "
- + "= citydim.id and (( citydim . name ) = 'FOOBAR' ) " + "and (citydim.dt = 'latest')";
+ " left outer join " + getDbName() + "c1_citytable citydim on testcube.cityid "
+ + "= citydim.id and (( citydim . name ) = 'FOOBAR' ) " + "and (citydim.dt = 'latest')";
Assert.assertTrue(hql.contains(partSQL));
partSQL =
- " right outer join " + getDbName() + "c1_testdim2tbl testdim2 on "
- + "testcube.dim2 = testdim2.id right outer join " + getDbName()
- + "c1_testdim3tbl testdim3 on testdim2.testdim3id = testdim3.id and "
- + "(testdim2.dt = 'latest') right outer join " + getDbName()
- + "c1_testdim4tbl testdim4 on testdim3.testdim4id = testdim4.id and "
- + "(( testdim4 . name ) = 'TESTDIM4NAME' ) and (testdim3.dt = 'latest')";
+ " right outer join " + getDbName() + "c1_testdim2tbl testdim2 on "
+ + "testcube.dim2 = testdim2.id right outer join " + getDbName()
+ + "c1_testdim3tbl testdim3 on testdim2.testdim3id = testdim3.id and "
+ + "(testdim2.dt = 'latest') right outer join " + getDbName()
+ + "c1_testdim4tbl testdim4 on testdim3.testdim4id = testdim4.id and "
+ + "(( testdim4 . name ) = 'TESTDIM4NAME' ) and (testdim3.dt = 'latest')";
Assert.assertTrue(hql.contains(partSQL));
}
@@ -372,15 +372,9 @@ public class TestJoinResolver extends TestQueryRewrite {
@Test
public void testJoinChains() throws SemanticException, ParseException {
- String query = "select citystate.name, sum(msr2) from basecube where " + twoDaysRange + " group by citystate.name";
- String hqlQuery = rewrite(query, hconf);
- String expected = getExpectedQuery("basecube", "select citystate.name, sum(basecube.msr2) FROM ",
- " join " + getDbName() + "c1_citytable citydim ON baseCube.cityid = citydim.id and citydim.dt = 'latest'" +
- " join " + getDbName() + "c1_statetable cityState ON citydim.stateid=cityState.id and cityState.dt= 'latest'",
- null, "group by citystate.name",
- null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
- TestCubeRewriter.compareQueries(expected, hqlQuery);
+ String query, hqlQuery, expected;
+ // Single joinchain with direct link
query = "select cubestate.name, sum(msr2) from basecube where " + twoDaysRange + " group by cubestate.name";
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("basecube", "select cubestate.name, sum(basecube.msr2) FROM ",
@@ -389,6 +383,17 @@ public class TestJoinResolver extends TestQueryRewrite {
null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
+ // Single joinchain with two chains
+ query = "select citystate.name, sum(msr2) from basecube where " + twoDaysRange + " group by citystate.name";
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube", "select citystate.name, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_citytable citydim ON baseCube.cityid = citydim.id and citydim.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable cityState ON citydim.stateid=cityState.id and cityState.dt= 'latest'",
+ null, "group by citystate.name",
+ null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Single joinchain with two chains, accessed as refcolumn
query = "select cityStateCapital, sum(msr2) from basecube where " + twoDaysRange;
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("basecube", "select citystate.capital, sum(basecube.msr2) FROM ",
@@ -398,10 +403,12 @@ public class TestJoinResolver extends TestQueryRewrite {
null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
+ // Same test, Accessing refcol as a column of cube
query = "select basecube.cityStateCapital, sum(msr2) from basecube where " + twoDaysRange;
hqlQuery = rewrite(query, hconf);
TestCubeRewriter.compareQueries(expected, hqlQuery);
+ // Adding Order by
query = "select cityStateCapital, sum(msr2) from basecube where " + twoDaysRange + " order by cityStateCapital";
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("basecube", "select citystate.capital, sum(basecube.msr2) FROM ",
@@ -411,6 +418,7 @@ public class TestJoinResolver extends TestQueryRewrite {
null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
+ // Single joinchain, but one column accessed as refcol and another as chain.column
query = "select citystate.name, cityStateCapital, sum(msr2) from basecube where " + twoDaysRange;
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("basecube", "select citystate.name, citystate.capital, sum(basecube.msr2) FROM ",
@@ -420,6 +428,145 @@ public class TestJoinResolver extends TestQueryRewrite {
null, getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
+ // Single join chain and an unrelated dimension
+ query = "select cubeState.name, citydim.name, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select cubestate.name, citydim.name, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_statetable cubestate on basecube.stateid = cubestate.id and cubestate.dt = 'latest'"
+ + " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and citydim.dt = 'latest'",
+ null, "group by cubestate.name,citydim.name", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Multiple join chains with same destination table
+ query = "select cityState.name, cubeState.name, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube", "select citystate.name, cubestate.name, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and "
+ + "citydim.dt = 'latest'"
+ + " join " + getDbName() + "c1_statetable citystate on citydim.stateid = citystate.id and " +
+ "citystate.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable cubestate on basecube.stateid = cubestate.id and cubestate.dt = 'latest'"
+ , null, "group by citystate.name,cubestate.name", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Single joinchain with two paths, intermediate dimension accessed separately by name.
+ query = "select cityState.name, citydim.name, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select citystate.name, citydim.name, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and " +
+ "citydim.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable citystate on citydim.stateid = citystate.id and " +
+ "citystate.dt = 'latest'", null, "group by citystate.name,citydim.name", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Multi joinchains + a dimension part of one of the chains.
+ query = "select cityState.name, cubeState.name, citydim.name, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select citystate.name, cubestate.name, citydim.name, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and " +
+ "citydim.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable citystate on citydim.stateid = citystate.id and " +
+ "citystate.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable cubestate on basecube.stateid = cubestate.id and cubestate.dt = 'latest'"
+ , null, "group by citystate.name,cubestate.name,citydim.name", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Two joinchains, one accessed as refcol.
+ query = "select cubestate.name, cityStateCapital, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select cubestate.name, citystate.capital, sum(basecube.msr2) FROM ",
+ "" +
+ " join " + getDbName() + "c1_statetable cubestate on basecube.stateid = cubestate.id and cubestate.dt = 'latest'" +
+ " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and citydim.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable citystate on citydim.stateid = citystate.id and citystate.dt = 'latest'" +
+ ""
+ , null, "group by cubestate.name, citystate.capital", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Two joinchains with initial path common. Testing merging of chains
+ query = "select cityState.name, cityZip.f1, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select citystate.name, cityzip.f1, sum(basecube.msr2) FROM ",
+ " join " + getDbName() + "c1_citytable citydim on basecube.cityid = citydim.id and " +
+ "citydim.dt = 'latest'" +
+ " join " + getDbName() + "c1_statetable citystate on citydim.stateid = citystate.id and " +
+ "citystate.dt = 'latest'" +
+ " join " + getDbName() + "c1_ziptable cityzip on citydim.zipcode = cityzip.code and " +
+ "cityzip.dt = 'latest'"
+ , null, "group by citystate.name,cityzip.f1", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // Two joinchains with common intermediate dimension, but different paths to that common dimension
+ // checking aliasing
+ query = "select cubeStateCountry.name, cubeCityStateCountry.name, sum(msr2) from basecube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("basecube",
+ "select cubestatecountry.name, cubecitystatecountry.name, sum(basecube.msr2) FROM ",
+ ""
+ + " join TestQueryRewrite.c1_statetable statedim_0 on basecube.stateid = statedim_0.id and statedim_0.dt = 'latest' "
+ + " join TestQueryRewrite.c1_countrytable cubestatecountry on statedim_0.countryid = cubestatecountry.id "
+ + " join TestQueryRewrite.c1_citytable citydim on basecube.cityid = citydim.id and citydim.dt = 'latest' "
+ + " join TestQueryRewrite.c1_statetable statedim on citydim.stateid = statedim.id and statedim.dt = 'latest' "
+ + " join TestQueryRewrite.c1_countrytable cubecitystatecountry on statedim.countryid = cubecitystatecountry.id "
+ + "", null, "group by cubestatecountry.name, cubecitystatecountry.name", null,
+ getWhereForDailyAndHourly2days("basecube", "c1_testfact1_base")
+ );
+ TestCubeRewriter.compareQueries(expected, hqlQuery);
+
+ // this test case should pass when default qualifiers for dimensions' chains are added
+ // Two joinchains with same destination, and the destination table accessed separately
+ query = "select cityState.name, cubeState.name, statedim.name, sum(msr2) from basecube where " + twoDaysRange;
+ try{
+ rewrite(query, hconf);
+ Assert.fail("Should have failed. " +
+ "It's not possible to resolve which statedim is being asked for when cityState and cubeState both end at" +
+ " statedim table.");
+ } catch(SemanticException e) {
+ Assert.assertNotNull(e.getCause());
+ Assert.assertEquals(
+ e.getCause().getMessage().indexOf("Table statedim has 2 different paths through joinchains"), 0);
+ }
+
+ // this test case should pass when default qualifiers for dimensions' chains are added
+ // Two Single joinchain, And dest table accessed separately.
+ query = "select cubeState.name, statedim.name, sum(msr2) from basecube where " + twoDaysRange;
+ try{
+ rewrite(query, hconf);
+ Assert.fail("Should have failed. " +
+ "The table statedim is getting accessed as both cubeState and statedim ");
+ } catch(SemanticException e) {
+ Assert.assertNotNull(e.getCause());
+ Assert.assertEquals(e.getCause().getMessage().toLowerCase(),
+ "Table statedim is getting accessed via two different names: [cubestate, statedim]".toLowerCase());
+ }
+ // this should pass when default qualifiers are added
+ query = "select cityStateCapital, statedim.name, sum(msr2) from basecube where " + twoDaysRange;
+ try{
+ rewrite(query, hconf);
+ Assert.fail("Should have failed. " +
+ "The table statedim is getting accessed as both cubeState and statedim ");
+ } catch(SemanticException e) {
+ Assert.assertNotNull(e.getCause());
+ Assert.assertEquals(e.getCause().getMessage().toLowerCase(),
+ "Table statedim is getting accessed via two different names: [citystate, statedim]".toLowerCase());
+ }
// Test 4 Dim only query with join chains
@@ -485,17 +632,15 @@ public class TestJoinResolver extends TestQueryRewrite {
System.out.println("testDimOnlyJoinChainExpected1 : " + expectedClauses);
System.out.println("testDimOnlyJoinChainActual1 : " + actualClauses);
Assert.assertEquals(expectedClauses, actualClauses);
-
- //TODO add below tests once support for multichains with same destination is available
- query = "select citystate.name, cubestate.name, msr2 from basecube where " + twoDaysRange;
- query = "select cubestate.name, cityStateCapital msr2 from basecube where " + twoDaysRange;
}
@Test
public void testMultiPaths() throws SemanticException, ParseException {
- String query = "select testdim3.name, sum(msr2) from testcube where " + twoDaysRange;
- String hqlQuery = rewrite(query, hconf);
- String expected = getExpectedQuery("testcube", "select testdim3.name, sum(testcube.msr2) FROM ",
+ String query, hqlQuery, expected;
+
+ query = "select testdim3.name, sum(msr2) from testcube where " + twoDaysRange;
+ hqlQuery = rewrite(query, hconf);
+ expected = getExpectedQuery("testcube", "select testdim3.name, sum(testcube.msr2) FROM ",
" join " + getDbName() + "c1_testdim3tbl testdim3 ON testcube.testdim3id = testdim3.id and testdim3.dt = 'latest'",
null, "group by testdim3.name",
null, getWhereForDailyAndHourly2days("testcube", "c1_summary1"));
@@ -516,7 +661,7 @@ public class TestJoinResolver extends TestQueryRewrite {
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("testcube", "select testdim3.id, avg(testcube.msr2) FROM ",
" join " + getDbName() + "c1_testdim2tbl testdim2 ON testcube.dim2 = testdim2.id and testdim2.dt = 'latest'" +
- " join " + getDbName() + "c1_testdim3tbl testdim3 ON testdim2.testdim3id = testdim3.id and testdim3.dt = 'latest'",
+ " join " + getDbName() + "c1_testdim3tbl testdim3 ON testdim2.testdim3id = testdim3.id and testdim3.dt = 'latest'",
null, "group by testdim3.id",
null, getWhereForHourly2days("testcube", "c1_testfact2_raw"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
@@ -535,11 +680,12 @@ public class TestJoinResolver extends TestQueryRewrite {
query = "select citydim.name, testdim4.name, testdim3id, avg(msr2) from testcube where " + twoDaysRange;
hqlQuery = rewrite(query, hconf);
expected = getExpectedQuery("testcube", "select citydim.name, testdim4.name, testdim3.id, avg(testcube.msr2) FROM ",
- " join " + getDbName() + "c1_citytable citydim ON testcube.cityid = citydim.id and citydim.dt = 'latest'" +
" join " + getDbName() + "c1_testdim2tbl testdim2 ON testcube.dim2 = testdim2.id and testdim2.dt = 'latest'" +
" join " + getDbName() + "c1_testdim3tbl testdim3 ON testdim2.testdim3id = testdim3.id and testdim3.dt = 'latest'"
+ " join " + getDbName() + "c1_testdim4tbl testdim4 ON testdim3.testDim4id = testdim4.id and" +
- " testdim4.dt = 'latest'", null, "group by citydim.name, testdim4.name, testdim3.id", null,
+ " testdim4.dt = 'latest'" +
+ " join " + getDbName() + "c1_citytable citydim ON testcube.cityid = citydim.id and citydim.dt = 'latest'"
+ , null, "group by citydim.name, testdim4.name, testdim3.id", null,
getWhereForHourly2days("testcube", "c1_testfact2_raw"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
[2/2] incubator-lens git commit: LENS-88 : Adds support for multiple
chains to same destination table in same query (Rajat Khandelwal via
amareshwari)
Posted by am...@apache.org.
LENS-88 : Adds support for multiple chains to same destination table in same query (Rajat Khandelwal via amareshwari)
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/e2c0547d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/e2c0547d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/e2c0547d
Branch: refs/heads/master
Commit: e2c0547d737eacae4858f89c180b3f6318757f7f
Parents: 8c722a5
Author: Amareshwari Sriramdasu <am...@inmobi.com>
Authored: Sun Jan 11 11:45:16 2015 +0530
Committer: Amareshwari Sriramdasu <am...@inmobi.com>
Committed: Sun Jan 11 11:45:16 2015 +0530
----------------------------------------------------------------------
.../apache/lens/cube/metadata/JoinChain.java | 6 +-
.../apache/lens/cube/metadata/SchemaGraph.java | 27 +-
.../apache/lens/cube/parse/AliasReplacer.java | 2 +-
.../org/apache/lens/cube/parse/Aliased.java | 41 ++
.../lens/cube/parse/CandidateTableResolver.java | 2 +-
.../apache/lens/cube/parse/ColumnResolver.java | 2 +-
.../lens/cube/parse/CubeQueryContext.java | 41 +-
.../cube/parse/DenormalizationResolver.java | 2 +-
.../apache/lens/cube/parse/DimHQLContext.java | 4 +-
.../lens/cube/parse/DimOnlyHQLContext.java | 9 -
.../apache/lens/cube/parse/JoinResolver.java | 574 +++++++++++++------
.../lens/cube/parse/StorageTableResolver.java | 2 +-
.../org/apache/lens/cube/parse/StorageUtil.java | 8 +-
.../lens/cube/parse/TimerangeResolver.java | 4 +-
.../apache/lens/cube/parse/CubeTestSetup.java | 94 ++-
.../lens/cube/parse/TestCubeRewriter.java | 9 +-
.../lens/cube/parse/TestExpressionResolver.java | 45 +-
.../lens/cube/parse/TestJoinResolver.java | 200 ++++++-
18 files changed, 755 insertions(+), 317 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/metadata/JoinChain.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/JoinChain.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/JoinChain.java
index 12909a7..59c3727 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/JoinChain.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/JoinChain.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Properties;
import java.util.Set;
import lombok.EqualsAndHashCode;
@@ -98,10 +97,7 @@ public class JoinChain implements Named {
* @param name
*/
public JoinChain(AbstractCubeTable table, String name) {
- boolean isCube = false;
- if(table instanceof Cube) {
- isCube = true;
- }
+ boolean isCube = (table instanceof Cube);
this.name = name;
this.paths = new ArrayList<Path>();
int numChains = 0;
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/metadata/SchemaGraph.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/SchemaGraph.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/SchemaGraph.java
index b5a1631..f0e14b2 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/SchemaGraph.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/SchemaGraph.java
@@ -18,7 +18,10 @@
*/
package org.apache.lens.cube.metadata;
+import lombok.Getter;
+import lombok.Setter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.JoinType;
import java.util.*;
@@ -27,10 +30,10 @@ public class SchemaGraph {
* An edge in the schema graph
*/
public static class TableRelationship {
- final String fromColumn;
- final AbstractCubeTable fromTable;
- final String toColumn;
- final AbstractCubeTable toTable;
+ @Getter final String fromColumn;
+ @Getter final AbstractCubeTable fromTable;
+ @Getter final String toColumn;
+ @Getter final AbstractCubeTable toTable;
public TableRelationship(String fromCol, AbstractCubeTable fromTab, String toCol, AbstractCubeTable toTab) {
fromColumn = fromCol;
@@ -39,22 +42,6 @@ public class SchemaGraph {
toTable = toTab;
}
- public String getFromColumn() {
- return fromColumn;
- }
-
- public String getToColumn() {
- return toColumn;
- }
-
- public AbstractCubeTable getFromTable() {
- return fromTable;
- }
-
- public AbstractCubeTable getToTable() {
- return toTable;
- }
-
@Override
public String toString() {
return fromTable.getName() + "." + fromColumn + "->" + toTable.getName() + "." + toColumn;
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java
index e4987f3..85b3eee 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/AliasReplacer.java
@@ -172,7 +172,7 @@ class AliasReplacer implements ContextRewriter {
}
private void extractTabAliasForCol(CubeQueryContext cubeql) throws SemanticException {
- Set<String> columns = cubeql.getTblAlaisToColumns().get(CubeQueryContext.DEFAULT_TABLE);
+ Set<String> columns = cubeql.getTblAliasToColumns().get(CubeQueryContext.DEFAULT_TABLE);
if (columns == null) {
return;
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/Aliased.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/Aliased.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/Aliased.java
new file mode 100644
index 0000000..47b4ba8
--- /dev/null
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/Aliased.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.cube.parse;
+
+import lombok.AllArgsConstructor;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.Setter;
+import org.apache.lens.cube.metadata.Named;
+
+@EqualsAndHashCode
+@AllArgsConstructor
+public class Aliased<T extends Named> {
+ @Getter @Setter T object;
+ @Getter @Setter String alias;
+ public String getFinalAlias() {
+ return alias == null ? object.getName() : alias;
+ }
+ public static <K extends Named> Aliased<K> create(K obj) {
+ return create(obj, null);
+ }
+ public static <K extends Named> Aliased<K> create(K obj, String alias) {
+ return new Aliased<K>(obj, alias);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java
index dfb07aa..4d472ac 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTableResolver.java
@@ -72,7 +72,7 @@ class CandidateTableResolver implements ContextRewriter {
cubeql.getHiveConf().getBoolean(CubeQueryConfUtil.ENABLE_MULTI_TABLE_SELECT,
CubeQueryConfUtil.DEFAULT_MULTI_TABLE_SELECT);
if (checkForQueriedColumns) {
- LOG.debug("Dump queried columns:" + cubeql.getTblAlaisToColumns());
+ LOG.debug("Dump queried columns:" + cubeql.getTblAliasToColumns());
populateCandidateTables(cubeql);
resolveCandidateFactTables(cubeql);
resolveCandidateDimTables(cubeql);
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java
index 306dd2b..f8a71bc 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/ColumnResolver.java
@@ -71,7 +71,7 @@ class ColumnResolver implements ContextRewriter {
getColsForTree(cubeql, cubeql.getOrderByAST());
// Update join dimension tables
- for (String table : cubeql.getTblAlaisToColumns().keySet()) {
+ for (String table : cubeql.getTblAliasToColumns().keySet()) {
try {
if (!CubeQueryContext.DEFAULT_TABLE.equalsIgnoreCase(table)) {
cubeql.addQueriedTable(table);
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java
index 6767058..4a03701 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryContext.java
@@ -70,8 +70,11 @@ public class CubeQueryContext {
// metadata
private CubeInterface cube;
- // Dimensions accessed in the query
- protected Set<Dimension> dimensions = new HashSet<Dimension>();
+ // Dimensions accessed in the query, contains dimensions that are joinchain destinations
+ // of the joinchains used.
+ @Getter protected Set<Dimension> dimensions = new HashSet<Dimension>();
+ // The dimensions accessed by name in the query directly, via tablename.columname
+ @Getter protected Set<Dimension> nonChainedDimensions = new HashSet<Dimension>();
// Joinchains accessed in the query
@Getter protected Map<String, JoinChain> joinchains = new HashMap<String, JoinChain>();
private final Set<String> queriedDimAttrs = new HashSet<String>();
@@ -201,7 +204,7 @@ public class CubeQueryContext {
if (retVal) {
joinchains.put(aliasLowerCaseStr, new JoinChain(joinchain));
String destTable = joinchain.getDestTable();
- boolean added = addQueriedTable(alias, destTable, isOptional);
+ boolean added = addQueriedTable(alias, destTable, isOptional, true);
if (!added) {
LOG.info("Queried tables do not exist. Missing tables:" + destTable);
throw new SemanticException(ErrorMsg.NEITHER_CUBE_NOR_DIMENSION);
@@ -222,7 +225,7 @@ public class CubeQueryContext {
if (tblName == null) {
tblName = alias;
}
- boolean added = addQueriedTable(alias, tblName, isOptional);
+ boolean added = addQueriedTable(alias, tblName, isOptional, false);
if (!added) {
// try adding as joinchain
added = addJoinChain(alias, isOptional);
@@ -230,7 +233,22 @@ public class CubeQueryContext {
return added;
}
- private boolean addQueriedTable(String alias, String tblName, boolean isOptional) throws SemanticException {
+ /**
+ * destination table : a table whose columns are getting queried
+ * intermediate table : a table which is only used as a link between cube and destination table
+ *
+ * @param alias
+ * @param tblName
+ * @param isOptional pass false when it's a destination table
+ * pass true when it's an intermediate table
+ * when join chain destination is being added, this will be false.
+ * @param isChainedDimension pass true when you're adding the dimension as a joinchain destination,
+ * pass false when this table is mentioned by name in the user query
+ * @return true if added
+ * @throws SemanticException
+ */
+ private boolean addQueriedTable(String alias, String tblName, boolean isOptional, boolean isChainedDimension)
+ throws SemanticException {
alias = alias.toLowerCase();
if (cubeTbls.containsKey(alias)) {
return true;
@@ -249,6 +267,9 @@ public class CubeQueryContext {
if (!isOptional) {
dimensions.add(dim);
}
+ if(!isChainedDimension) {
+ nonChainedDimensions.add(dim);
+ }
cubeTbls.put(alias, dim);
} else {
return false;
@@ -358,10 +379,6 @@ public class CubeQueryContext {
pruneMsgs.add(msg);
}
- public Set<Dimension> getDimensions() {
- return dimensions;
- }
-
public String getAliasForTabName(String tabName) {
for (String alias : qb.getTabAliases()) {
String table = qb.getTabNameForAlias(alias);
@@ -557,8 +574,7 @@ public class CubeQueryContext {
}
private String getWhereClauseWithAlias(Map<Dimension, CandidateDim> dimsToQuery, String alias) {
- Dimension dim = (Dimension) cubeTbls.get(alias);
- return dimsToQuery.get(dim).whereClause.replace(getAliasForTabName(dim.getName()), alias);
+ return StorageUtil.getWhereClause(dimsToQuery.get(cubeTbls.get(alias)), alias);
}
String getQBFromString(CandidateFact fact, Map<Dimension, CandidateDim> dimsToQuery) throws SemanticException {
@@ -723,7 +739,6 @@ public class CubeQueryContext {
throw new SemanticException(ErrorMsg.NO_CANDIDATE_FACT_AVAILABLE, reason);
}
}
-
return facts;
}
@@ -833,7 +848,7 @@ public class CubeQueryContext {
return ParseUtils.findRootNonNullToken(tree);
}
- public Map<String, Set<String>> getTblAlaisToColumns() {
+ public Map<String, Set<String>> getTblAliasToColumns() {
return tblAliasToColumns;
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java
index dab3099..5643b36 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DenormalizationResolver.java
@@ -338,7 +338,7 @@ public class DenormalizationResolver implements ContextRewriter {
// Adds all the reference dimensions as eligible for denorm fields
denormCtx = new DenormalizationContext(cubeql);
cubeql.setDenormCtx(denormCtx);
- for (Map.Entry<String, Set<String>> entry : cubeql.getTblAlaisToColumns().entrySet()) {
+ for (Map.Entry<String, Set<String>> entry : cubeql.getTblAliasToColumns().entrySet()) {
// skip default alias
if (entry.getKey() == CubeQueryContext.DEFAULT_TABLE) {
continue;
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java
index 956ff84..15891c8 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimHQLContext.java
@@ -103,8 +103,8 @@ abstract class DimHQLContext extends SimpleHQLContext {
boolean added = (originalWhere != null);
for (Dimension dim : queriedDims) {
CandidateDim cdim = dimsToQuery.get(dim);
- if (!cdim.isWhereClauseAdded()) {
- appendWhereClause(whereBuf, cdim.whereClause, added);
+ if (!cdim.isWhereClauseAdded() && !StringUtils.isBlank(cdim.whereClause)) {
+ appendWhereClause(whereBuf, StorageUtil.getWhereClause(cdim, query.getAliasForTabName(dim.getName())), added);
added = true;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/DimOnlyHQLContext.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimOnlyHQLContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimOnlyHQLContext.java
index ceba462..8d6274c 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DimOnlyHQLContext.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/DimOnlyHQLContext.java
@@ -61,15 +61,6 @@ class DimOnlyHQLContext extends DimHQLContext {
return null;
}
- protected String getFromString() throws SemanticException {
- String fromString = getFromTable();
- if (query.isAutoJoinResolved()) {
- fromString =
- query.getAutoJoinCtx().getFromString(fromString, null, getDimsToQuery().keySet(), getDimsToQuery(), query);
- }
- return fromString;
- }
-
@Override
protected Set<Dimension> getQueriedDimSet() {
return getDimsToQuery().keySet();
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java
index 9215d38..0bcc558 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinResolver.java
@@ -31,9 +31,7 @@ import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_UNIQUEJOIN;
import java.util.*;
-import lombok.Getter;
-import lombok.Setter;
-import lombok.ToString;
+import lombok.*;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -49,21 +47,15 @@ import org.apache.hadoop.hive.ql.parse.QB;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.lens.cube.metadata.AbstractCubeTable;
-import org.apache.lens.cube.metadata.CubeDimensionTable;
-import org.apache.lens.cube.metadata.CubeInterface;
-import org.apache.lens.cube.metadata.CubeMetastoreClient;
-import org.apache.lens.cube.metadata.Dimension;
-import org.apache.lens.cube.metadata.JoinChain;
-import org.apache.lens.cube.metadata.SchemaGraph;
+import org.apache.lens.cube.metadata.*;
import org.apache.lens.cube.metadata.SchemaGraph.TableRelationship;
import org.apache.lens.cube.parse.CandidateTablePruneCause.CubeTableCause;
import org.apache.lens.cube.parse.CubeQueryContext.OptionalDimCtx;
/**
- *
+ *
* JoinResolver.
- *
+ *
*/
class JoinResolver implements ContextRewriter {
@@ -74,12 +66,16 @@ class JoinResolver implements ContextRewriter {
private final int cost;
// all dimensions in path except target
private final Set<Dimension> dimsInPath;
- private final Map<Dimension, List<TableRelationship>> chain;
+ private CubeQueryContext cubeql;
+ private final Map<Aliased<Dimension>, List<TableRelationship>> chain;
+ private final JoinTree joinTree;
transient Map<AbstractCubeTable, Set<String>> chainColumns = new HashMap<AbstractCubeTable, Set<String>>();
- public JoinClause(Map<Dimension, List<TableRelationship>> chain, Set<Dimension> dimsInPath) {
+ public JoinClause(CubeQueryContext cubeql, Map<Aliased<Dimension>, List<TableRelationship>> chain, Set<Dimension> dimsInPath) {
+ this.cubeql = cubeql;
this.chain = chain;
- this.cost = dimsInPath.size();
+ this.joinTree = mergeJoinChains(chain);
+ this.cost = joinTree.getNumEdges();
this.dimsInPath = dimsInPath;
}
@@ -111,6 +107,180 @@ class JoinResolver implements ContextRewriter {
public int compareTo(JoinClause joinClause) {
return cost - joinClause.getCost();
}
+
+ /**
+ * Takes chains and merges them in the form of a tree. If two chains have some common path
+ * till some table and bifurcate from there, then in the chain, both paths will have the common
+ * path but the resultant tree will have single path from root(cube) to that table and paths
+ * will bifurcate from there.
+ *
+ * For example, citystate = [basecube.cityid=citydim.id], [citydim.stateid=statedim.id]
+ * cityzip = [basecube.cityid=citydim.id], [citydim.zipcode=zipdim.code]
+ *
+ * Without merging, the behaviour is like this:
+ *
+ *
+ * (basecube.cityid=citydim.id) (citydim.stateid=statedim.id)
+ * ____________________________citydim-------------------------------------statedim
+ * |
+ * basecube-----
+ * |____________________________citydim-------------------------------------zipdim
+ * (basecube.cityid=citydim.id) (citydim.zipcode=zipdim.code)
+ *
+ *
+ *
+ * Merging will result in a tree like following
+ *
+ *
+ * (citydim.stateid=statedim.id)
+ * ________________________________ statedim
+ * (basecube.cityid=citydim.id) |
+ * basecube-------------------------------citydim----
+ * |________________________________ zipdim
+ * (citydim.zipcode=zipdim.code)
+ *
+ * Doing this will reduce the number of joins whereever possible.
+ *
+ * @param chain Joins in Linear format.
+ * @return Joins in Tree format
+ */
+ public JoinTree mergeJoinChains(Map<Aliased<Dimension>, List<TableRelationship>> chain) {
+ Map<String, Integer> aliasUsage = new HashMap<String, Integer>();
+ JoinTree root = JoinTree.createRoot();
+ for(Map.Entry<Aliased<Dimension>, List<TableRelationship>> entry: chain.entrySet()) {
+ JoinTree current = root;
+ // Last element in this list is link from cube to first dimension
+ for(int i = entry.getValue().size() - 1; i >= 0; i--) {
+ // Adds a child if needed, or returns a child already existing corresponding to the given link.
+ current = current.addChild(entry.getValue().get(i), cubeql, aliasUsage);
+ if (cubeql.getAutoJoinCtx().partialJoinChains) {
+ JoinType joinType = cubeql.getAutoJoinCtx().tableJoinTypeMap.get(entry.getKey().getObject());
+ //This ensures if (sub)paths are same, but join type is not same, merging will not happen.
+ current.setJoinType(joinType);
+ }
+ }
+ // This is a destination table. Decide alias separately. e.g. chainname
+ // nullcheck is necessary because dimensions can be destinations too. In that case getAlias() == null
+ if(entry.getKey().getAlias() != null) {
+ current.setAlias(entry.getKey().getAlias());
+ }
+ }
+ if(root.subtrees.size() > 0) {
+ root.setAlias(cubeql.getAliasForTabName(root.subtrees.keySet().iterator().next().getFromTable().getName()));
+ }
+ return root;
+ }
+ }
+
+ public static class JoinTree {
+ //parent of the node
+ JoinTree parent;
+ // current table is parentRelationship.destTable;
+ TableRelationship parentRelationship;
+ // Alias for the join clause
+ @Getter @Setter String alias;
+ public Map<TableRelationship, JoinTree> subtrees = new LinkedHashMap<TableRelationship, JoinTree>();
+ // Number of nodes from root to this node. depth of root is 0. Unused for now.
+ @Getter private int depthFromRoot;
+ // join type of the current table.
+ @Getter @Setter JoinType joinType;
+
+ public static JoinTree createRoot() {
+ return new JoinTree(null, null, 0);
+ }
+ public JoinTree(JoinTree parent, TableRelationship tableRelationship,
+ int depthFromRoot) {
+ this.parent = parent;
+ this.parentRelationship = tableRelationship;
+ this.depthFromRoot = depthFromRoot;
+ }
+
+ public JoinTree addChild
+ (TableRelationship tableRelationship, CubeQueryContext cubeql, Map<String, Integer> aliasUsage) {
+ if(subtrees.get(tableRelationship) == null) {
+ JoinTree current = new JoinTree(this, tableRelationship,
+ this.depthFromRoot + 1);
+ // Set alias. Need to compute only when new node is being created.
+ // The following code ensures that For intermediate tables, aliases are given
+ // in the order citydim, citydim_0, citydim_1, ...
+ // And for destination tables, an alias will be decided from here but might be
+ // overridden outside this function.
+ AbstractCubeTable destTable = tableRelationship.getToTable();
+ current.setAlias(cubeql.getAliasForTabName(destTable.getName()));
+ if(aliasUsage.get(current.getAlias()) == null) {
+ aliasUsage.put(current.getAlias(), 0);
+ } else {
+ aliasUsage.put(current.getAlias(), aliasUsage.get(current.getAlias()) + 1);
+ current.setAlias(current.getAlias() + "_" + (aliasUsage.get(current.getAlias()) - 1));
+ }
+ subtrees.put(tableRelationship, current);
+ }
+ return subtrees.get(tableRelationship);
+ }
+
+ // Recursive computation of number of edges.
+ public int getNumEdges() {
+ int ret = 0;
+ for(JoinTree tree: subtrees.values()) {
+ ret += 1;
+ ret += tree.getNumEdges();
+ }
+ return ret;
+ }
+
+ // Breadth First Traversal. Unused currently.
+ public Iterator<JoinTree> bft() {
+ return new Iterator<JoinTree>() {
+ List<JoinTree> remaining = new ArrayList<JoinTree>(){
+ {
+ addAll(subtrees.values());
+ }
+ };
+ @Override
+ public boolean hasNext() {
+ return remaining.isEmpty();
+ }
+
+ @Override
+ public JoinTree next() {
+ JoinTree retval = remaining.remove(0);
+ remaining.addAll(retval.subtrees.values());
+ return retval;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented");
+ }
+ };
+ }
+
+ // Depth first traversal of the tree. Used in forming join string.
+ public Iterator<JoinTree> dft() {
+ return new Iterator<JoinTree>() {
+ Stack<JoinTree> joinTreeStack = new Stack<JoinTree>() {
+ {
+ addAll(subtrees.values());
+ }
+ };
+ @Override
+ public boolean hasNext() {
+ return !joinTreeStack.isEmpty();
+ }
+
+ @Override
+ public JoinTree next() {
+ JoinTree retval = joinTreeStack.pop();
+ joinTreeStack.addAll(retval.subtrees.values());
+ return retval;
+ }
+
+ @Override
+ public void remove() {
+ throw new RuntimeException("Not implemented");
+ }
+ };
+ }
}
/**
@@ -119,7 +289,7 @@ class JoinResolver implements ContextRewriter {
public static class AutoJoinContext {
// Map of a joined table to list of all possible paths from that table to
// the target
- private final Map<Dimension, List<SchemaGraph.JoinPath>> allPaths;
+ private final Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths;
// User supplied partial join conditions
private final Map<AbstractCubeTable, String> partialJoinConditions;
// True if the query contains user supplied partial join conditions
@@ -145,10 +315,10 @@ class JoinResolver implements ContextRewriter {
// there can be separate join clause for each fact incase of multi fact queries
@Getter Map<CandidateFact, JoinClause> factClauses = new HashMap<CandidateFact, JoinClause>();
@Getter @Setter JoinClause minCostClause;
- public AutoJoinContext(Map<Dimension, List<SchemaGraph.JoinPath>> allPaths,
- Map<Dimension, OptionalDimCtx> optionalDimensions, Map<AbstractCubeTable, String> partialJoinConditions,
- boolean partialJoinChains, Map<AbstractCubeTable, JoinType> tableJoinTypeMap, AbstractCubeTable autoJoinTarget,
- String joinTypeCfg, boolean joinsResolved) {
+ public AutoJoinContext(Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths,
+ Map<Dimension, OptionalDimCtx> optionalDimensions, Map<AbstractCubeTable, String> partialJoinConditions,
+ boolean partialJoinChains, Map<AbstractCubeTable, JoinType> tableJoinTypeMap, AbstractCubeTable autoJoinTarget,
+ String joinTypeCfg, boolean joinsResolved) {
this.allPaths = allPaths;
initJoinPathColumns();
this.partialJoinConditions = partialJoinConditions;
@@ -187,18 +357,18 @@ class JoinResolver implements ContextRewriter {
public void refreshJoinPathColumns() {
joinPathFromColumns.clear();
joinPathToColumns.clear();
- for (Map.Entry<Dimension, List<SchemaGraph.JoinPath>> joinPathEntry : allPaths.entrySet()) {
+ for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> joinPathEntry : allPaths.entrySet()) {
List<SchemaGraph.JoinPath> joinPaths = joinPathEntry.getValue();
- Map<AbstractCubeTable, List<String>> fromColPaths = joinPathFromColumns.get(joinPathEntry.getKey());
- Map<AbstractCubeTable, List<String>> toColPaths = joinPathToColumns.get(joinPathEntry.getKey());
+ Map<AbstractCubeTable, List<String>> fromColPaths = joinPathFromColumns.get(joinPathEntry.getKey().getObject());
+ Map<AbstractCubeTable, List<String>> toColPaths = joinPathToColumns.get(joinPathEntry.getKey().getObject());
if (fromColPaths == null) {
fromColPaths = new HashMap<AbstractCubeTable, List<String>>();
- joinPathFromColumns.put(joinPathEntry.getKey(), fromColPaths);
+ joinPathFromColumns.put(joinPathEntry.getKey().getObject(), fromColPaths);
}
if (toColPaths == null) {
toColPaths = new HashMap<AbstractCubeTable, List<String>>();
- joinPathToColumns.put(joinPathEntry.getKey(), toColPaths);
+ joinPathToColumns.put(joinPathEntry.getKey().getObject(), toColPaths);
}
populateJoinPathCols(joinPaths, fromColPaths, toColPaths);
}
@@ -234,7 +404,7 @@ class JoinResolver implements ContextRewriter {
LOG.info(src + " All paths" + allPaths);
}
public void removeJoinedTable(Dimension dim) {
- allPaths.remove(dim);
+ allPaths.remove(Aliased.create(dim));
joinPathFromColumns.remove(dim);
}
@@ -250,7 +420,7 @@ class JoinResolver implements ContextRewriter {
return fromString;
}
// Compute the merged join clause string for the min cost joinclause
- String clause = getMergedJoinClause(cubeql.getAutoJoinCtx().getJoinClause(fact).chain, dimsToQuery,
+ String clause = getMergedJoinClause(cubeql.getAutoJoinCtx().getJoinClause(fact), dimsToQuery,
qdims, cubeql);
fromString += clause;
@@ -258,9 +428,8 @@ class JoinResolver implements ContextRewriter {
}
// Some refactoring needed to account for multiple join paths
- public String getMergedJoinClause(Map<Dimension, List<TableRelationship>> joinChain,
- Map<Dimension, CandidateDim> dimsToQuery, Set<Dimension> qdims, CubeQueryContext cubeql) {
-
+ public String getMergedJoinClause(JoinClause joinClause,
+ Map<Dimension, CandidateDim> dimsToQuery, Set<Dimension> qdims, CubeQueryContext cubeql) {
Set<String> clauses = new LinkedHashSet<String>();
String joinTypeStr = "";
JoinType joinType = JoinType.INNER;
@@ -275,115 +444,111 @@ class JoinResolver implements ContextRewriter {
}
}
- for (Map.Entry<Dimension, List<TableRelationship>> entry : joinChain.entrySet()) {
- List<TableRelationship> chain = entry.getValue();
- Dimension table = entry.getKey();
-
- // check if join with this dimension is required
- if (!qdims.contains(table)) {
- continue;
- }
+ //TODO: prune from tails according to qdims.contains(dimensionAliased.getObject())) {
+ Iterator<JoinTree> iter = joinClause.joinTree.dft();
+ while (iter.hasNext()) {
+ JoinTree cur = iter.next();
if (partialJoinChains) {
- joinType = tableJoinTypeMap.get(table);
+ joinType = cur.getJoinType();
joinTypeStr = getJoinTypeStr(joinType);
}
-
- for (int i = chain.size() - 1; i >= 0; i--) {
- TableRelationship rel = chain.get(i);
- StringBuilder clause = new StringBuilder(joinTypeStr).append(" join ");
- // Add storage table name followed by alias
- clause.append(dimsToQuery.get(rel.getToTable()).getStorageString(
- cubeql.getAliasForTabName(rel.getToTable().getName())));
-
- clause.append(" on ").append(cubeql.getAliasForTabName(rel.getFromTable().getName())).append(".")
- .append(rel.getFromColumn()).append(" = ").append(cubeql.getAliasForTabName(rel.getToTable().getName()))
+ TableRelationship rel = cur.parentRelationship;
+ String toAlias, fromAlias;
+ fromAlias = cur.parent.getAlias();
+ toAlias = cur.getAlias();
+ StringBuilder clause = new StringBuilder(joinTypeStr).append(" join ");
+ // Add storage table name followed by alias
+ clause.append(dimsToQuery.get(rel.getToTable()).getStorageString(toAlias));
+
+ clause.append(" on ").append(fromAlias).append(".")
+ .append(rel.getFromColumn()).append(" = ").append(toAlias)
.append(".").append(rel.getToColumn());
- // We have to push user specified filters for the joined tables
- String userFilter = null;
- // Partition condition on the tables also needs to be pushed depending
- // on the join
- String storageFilter = null;
-
- if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) {
- // For inner and left joins push filter of right table
- userFilter = partialJoinConditions.get(rel.getToTable());
- if (partialJoinConditions.containsKey(rel.getFromTable())) {
- if (StringUtils.isNotBlank(userFilter)) {
- userFilter += (" AND " + partialJoinConditions.get(rel.getFromTable()));
- } else {
- userFilter = partialJoinConditions.get(rel.getFromTable());
- }
+ // We have to push user specified filters for the joined tables
+ String userFilter = null;
+ // Partition condition on the tables also needs to be pushed depending
+ // on the join
+ String storageFilter = null;
+
+ if (JoinType.INNER == joinType || JoinType.LEFTOUTER == joinType || JoinType.LEFTSEMI == joinType) {
+ // For inner and left joins push filter of right table
+ userFilter = partialJoinConditions.get(rel.getToTable());
+ if (partialJoinConditions.containsKey(rel.getFromTable())) {
+ if (StringUtils.isNotBlank(userFilter)) {
+ userFilter += (" AND " + partialJoinConditions.get(rel.getFromTable()));
+ } else {
+ userFilter = partialJoinConditions.get(rel.getFromTable());
}
- storageFilter = getStorageFilter(dimsToQuery, rel.getToTable());
- dimsToQuery.get(rel.getToTable()).setWhereClauseAdded();
- } else if (JoinType.RIGHTOUTER == joinType) {
- // For right outer joins, push filters of left table
- userFilter = partialJoinConditions.get(rel.getFromTable());
- if (partialJoinConditions.containsKey(rel.getToTable())) {
- if (StringUtils.isNotBlank(userFilter)) {
- userFilter += (" AND " + partialJoinConditions.get(rel.getToTable()));
- } else {
- userFilter = partialJoinConditions.get(rel.getToTable());
- }
+ }
+ storageFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias);
+ dimsToQuery.get(rel.getToTable()).setWhereClauseAdded();
+ } else if (JoinType.RIGHTOUTER == joinType) {
+ // For right outer joins, push filters of left table
+ userFilter = partialJoinConditions.get(rel.getFromTable());
+ if (partialJoinConditions.containsKey(rel.getToTable())) {
+ if (StringUtils.isNotBlank(userFilter)) {
+ userFilter += (" AND " + partialJoinConditions.get(rel.getToTable()));
+ } else {
+ userFilter = partialJoinConditions.get(rel.getToTable());
}
- if (rel.getFromTable() instanceof Dimension) {
- storageFilter = getStorageFilter(dimsToQuery, rel.getFromTable());
+ }
+ if (rel.getFromTable() instanceof Dimension) {
+ storageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias);
+ dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded();
+ }
+ } else if (JoinType.FULLOUTER == joinType) {
+ // For full outer we need to push filters of both left and right
+ // tables in the join clause
+ String leftFilter = null, rightFilter = null;
+ String leftStorageFilter = null, rightStorgeFilter = null;
+
+ if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getFromTable()))) {
+ leftFilter = partialJoinConditions.get(rel.getFromTable()) + " and ";
+ }
+
+ if (rel.getFromTable() instanceof Dimension) {
+ leftStorageFilter = getStorageFilter(dimsToQuery, rel.getFromTable(), fromAlias);
+ if (StringUtils.isNotBlank((leftStorageFilter))) {
dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded();
}
- } else if (JoinType.FULLOUTER == joinType) {
- // For full outer we need to push filters of both left and right
- // tables in the join clause
- String leftFilter = null, rightFilter = null;
- String leftStorageFilter = null, rightStorgeFilter = null;
-
- if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getFromTable()))) {
- leftFilter = partialJoinConditions.get(rel.getFromTable()) + " and ";
- }
+ }
- if (rel.getFromTable() instanceof Dimension) {
- leftStorageFilter = getStorageFilter(dimsToQuery, rel.getFromTable());
- if (StringUtils.isNotBlank((leftStorageFilter))) {
- dimsToQuery.get(rel.getFromTable()).setWhereClauseAdded();
- }
- }
+ if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getToTable()))) {
+ rightFilter = partialJoinConditions.get(rel.getToTable());
+ }
- if (StringUtils.isNotBlank(partialJoinConditions.get(rel.getToTable()))) {
- rightFilter = partialJoinConditions.get(rel.getToTable());
+ rightStorgeFilter = getStorageFilter(dimsToQuery, rel.getToTable(), toAlias);
+ if (StringUtils.isNotBlank(rightStorgeFilter)) {
+ if (StringUtils.isNotBlank((leftStorageFilter))) {
+ leftStorageFilter += " and ";
}
-
- rightStorgeFilter = getStorageFilter(dimsToQuery, rel.getToTable());
- if (StringUtils.isNotBlank(rightStorgeFilter)) {
- if (StringUtils.isNotBlank((leftStorageFilter))) {
- leftStorageFilter += " and ";
- }
- dimsToQuery.get(rel.getToTable()).setWhereClauseAdded();
- }
-
- userFilter = (leftFilter == null ? "" : leftFilter) + (rightFilter == null ? "" : rightFilter);
- storageFilter =
- (leftStorageFilter == null ? "" : leftStorageFilter)
- + (rightStorgeFilter == null ? "" : rightStorgeFilter);
+ dimsToQuery.get(rel.getToTable()).setWhereClauseAdded();
}
- if (StringUtils.isNotBlank(userFilter)) {
- clause.append(" and ").append(userFilter);
- }
- if (StringUtils.isNotBlank(storageFilter)) {
- clause.append(" and ").append(storageFilter);
- }
- clauses.add(clause.toString());
+ userFilter = (leftFilter == null ? "" : leftFilter) + (rightFilter == null ? "" : rightFilter);
+ storageFilter =
+ (leftStorageFilter == null ? "" : leftStorageFilter)
+ + (rightStorgeFilter == null ? "" : rightStorgeFilter);
+ }
+
+ if (StringUtils.isNotBlank(userFilter)) {
+ clause.append(" and ").append(userFilter);
+ }
+ if (StringUtils.isNotBlank(storageFilter)) {
+ clause.append(" and ").append(storageFilter);
}
+ clauses.add(clause.toString());
}
return StringUtils.join(clauses, "");
}
- public Set<Dimension> getDimsOnPath(Map<Dimension, List<TableRelationship>> joinChain, Set<Dimension> qdims) {
+ public Set<Dimension> getDimsOnPath
+ (Map<Aliased<Dimension>, List<TableRelationship>> joinChain, Set<Dimension> qdims) {
Set<Dimension> dimsOnPath = new HashSet<Dimension>();
- for (Map.Entry<Dimension, List<TableRelationship>> entry : joinChain.entrySet()) {
+ for (Map.Entry<Aliased<Dimension>, List<TableRelationship>> entry : joinChain.entrySet()) {
List<TableRelationship> chain = entry.getValue();
- Dimension table = entry.getKey();
+ Dimension table = entry.getKey().getObject();
// check if join with this dimension is required
if (!qdims.contains(table)) {
@@ -398,11 +563,14 @@ class JoinResolver implements ContextRewriter {
return dimsOnPath;
}
- private String getStorageFilter(Map<Dimension, CandidateDim> dimsToQuery, AbstractCubeTable table) {
+ private String getStorageFilter(Map<Dimension, CandidateDim> dimsToQuery, AbstractCubeTable table, String alias) {
String whereClause = "";
if (dimsToQuery != null && dimsToQuery.get(table) != null) {
if (StringUtils.isNotBlank(dimsToQuery.get(table).whereClause)) {
whereClause = dimsToQuery.get(table).whereClause;
+ if(alias != null) {
+ whereClause = StorageUtil.getWhereClause(whereClause, alias);
+ }
}
}
return whereClause;
@@ -500,20 +668,21 @@ class JoinResolver implements ContextRewriter {
pruneEmptyPaths(allPaths);
}
- private void pruneEmptyPaths(Map<Dimension, List<SchemaGraph.JoinPath>> allPaths) {
- Iterator<Map.Entry<Dimension, List<SchemaGraph.JoinPath>>> iter = allPaths.entrySet().iterator();
+ private void pruneEmptyPaths(Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths) {
+ Iterator<Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>>> iter = allPaths.entrySet().iterator();
while (iter.hasNext()) {
- Map.Entry<Dimension, List<SchemaGraph.JoinPath>> entry = iter.next();
+ Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> entry = iter.next();
if (entry.getValue().isEmpty()) {
iter.remove();
}
}
}
- private Map<Dimension, List<SchemaGraph.JoinPath>> pruneFactPaths(CubeInterface cube, final CandidateFact cfact) {
- Map<Dimension, List<SchemaGraph.JoinPath>> prunedPaths = new HashMap<Dimension, List<SchemaGraph.JoinPath>>();
+ private Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> pruneFactPaths
+ (CubeInterface cube, final CandidateFact cfact) {
+ Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> prunedPaths = new HashMap<Aliased<Dimension>, List<SchemaGraph.JoinPath>>();
// Remove join paths which cannot be satisfied by the candidate fact
- for (Map.Entry<Dimension, List<SchemaGraph.JoinPath>> ppaths : allPaths.entrySet()) {
+ for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> ppaths : allPaths.entrySet()) {
prunedPaths.put(ppaths.getKey(), new ArrayList<SchemaGraph.JoinPath>(ppaths.getValue()));
List<SchemaGraph.JoinPath> paths = prunedPaths.get(ppaths.getKey());
for (int i = 0; i < paths.size(); i++) {
@@ -565,7 +734,7 @@ class JoinResolver implements ContextRewriter {
*/
private Iterator<JoinClause> getJoinClausesForAllPaths(final CandidateFact fact,
final Set<Dimension> qdims, final CubeQueryContext cubeql) {
- Map<Dimension, List<SchemaGraph.JoinPath>> allPaths = this.allPaths;
+ Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths = this.allPaths;
// if fact is passed only look at paths possible from fact to dims
if (fact != null) {
allPaths = pruneFactPaths(cubeql.getCube(), fact);
@@ -577,11 +746,11 @@ class JoinResolver implements ContextRewriter {
// All path sets
final List<List<SchemaGraph.JoinPath>> pathSets = new ArrayList<List<SchemaGraph.JoinPath>>();
// Dimension corresponding to the path sets
- final Dimension dimensions[] = new Dimension[groupSizes.length];
+ final List<Aliased<Dimension>> dimensions = new ArrayList<Aliased<Dimension>>(groupSizes.length);
int i = 0;
- for (Map.Entry<Dimension, List<SchemaGraph.JoinPath>> entry : allPaths.entrySet()) {
- dimensions[i] = entry.getKey();
+ for (Map.Entry<Aliased<Dimension>, List<SchemaGraph.JoinPath>> entry : allPaths.entrySet()) {
+ dimensions.add(entry.getKey());
List<SchemaGraph.JoinPath> group = entry.getValue();
pathSets.add(group);
groupSizes[i] = group.size();
@@ -603,7 +772,7 @@ class JoinResolver implements ContextRewriter {
@Override
public JoinClause next() {
- Map<Dimension, List<TableRelationship>> chain = new LinkedHashMap<Dimension, List<TableRelationship>>();
+ Map<Aliased<Dimension>, List<TableRelationship>> chain = new LinkedHashMap<Aliased<Dimension>, List<TableRelationship>>();
//generate next permutation.
for(int i = groupSizes.length - 1, base=sample; i >= 0; base /= groupSizes[i], i--) {
selection[i] = base % groupSizes[i];
@@ -611,14 +780,14 @@ class JoinResolver implements ContextRewriter {
for (int i = 0; i < selection.length; i++) {
int selectedPath = selection[i];
List<TableRelationship> path = pathSets.get(i).get(selectedPath).getEdges();
- chain.put(dimensions[i], path);
+ chain.put(dimensions.get(i), path);
}
Set<Dimension> dimsOnPath = getDimsOnPath(chain, qdims);
sample++;
// Cost of join = number of tables joined in the clause
- return new JoinClause(chain, dimsOnPath);
+ return new JoinClause(cubeql, chain, dimsOnPath);
}
@Override
@@ -630,7 +799,7 @@ class JoinResolver implements ContextRewriter {
public Set<Dimension> pickOptionalTables(final CandidateFact fact,
Set<Dimension> qdims, CubeQueryContext cubeql) throws SemanticException {
- // Find the min cost join clause and add dimensions in the clause as optional dimensions
+ // Find the min cost join clause and add dimensions in the clause as optional dimensions
Set<Dimension> joiningOptionalTables = new HashSet<Dimension>();
if (qdims == null) {
return joiningOptionalTables;
@@ -685,12 +854,13 @@ class JoinResolver implements ContextRewriter {
return joiningOptionalTables;
}
- public Map<Dimension, List<SchemaGraph.JoinPath>> getAllPaths() {
+ public Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> getAllPaths() {
return allPaths;
}
public boolean isReachableDim(Dimension dim) {
- return allPaths.containsKey(dim) && !allPaths.get(dim).isEmpty();
+ Aliased<Dimension> aliased = Aliased.create(dim);
+ return allPaths.containsKey(aliased) && !allPaths.get(aliased).isEmpty();
}
}
@@ -722,6 +892,8 @@ class JoinResolver implements ContextRewriter {
private boolean partialJoinChain;
private AbstractCubeTable target;
private HiveConf conf;
+ private HashMap<Dimension, List<JoinChain>> dimensionInJoinChain = new HashMap<Dimension, List<JoinChain>>();
+
public JoinResolver(Configuration conf) {
}
@@ -751,9 +923,24 @@ class JoinResolver implements ContextRewriter {
}
}
+ private void processJoinChains(CubeQueryContext cubeql) throws HiveException {
+ for (JoinChain chain : cubeql.getJoinchains().values()) {
+ Set<String> dims = chain.getIntermediateDimensions();
+
+ dims.add(chain.getDestTable());
+ for(String dim: dims) {
+ Dimension dimension = cubeql.getMetastoreClient().getDimension(dim);
+ if(dimensionInJoinChain.get(dimension) == null) {
+ dimensionInJoinChain.put(dimension, new ArrayList<JoinChain>());
+ }
+ dimensionInJoinChain.get(dimension).add(chain);
+ }
+ }
+ }
+
/**
* Resolve joins automatically for the given query.
- *
+ *
* @param cubeql
* @throws SemanticException
*/
@@ -762,7 +949,8 @@ class JoinResolver implements ContextRewriter {
// A join is needed if there is a cube and at least one dimension, or, 0
// cubes and more than one
// dimensions
- Set<Dimension> dimensions = cubeql.getDimensions();
+ processJoinChains(cubeql);
+ Set<Dimension> dimensions = cubeql.getNonChainedDimensions();
// Add dimensions specified in the partial join tree
ASTNode joinClause = cubeql.getQB().getParseInfo().getJoinExpr();
if (joinClause == null) {
@@ -788,87 +976,99 @@ class JoinResolver implements ContextRewriter {
for (AbstractCubeTable partiallyJoinedTable : partialJoinConditions.keySet()) {
dimTables.add((Dimension) partiallyJoinedTable);
}
- Set<String> joinChainDims = new HashSet<String>();
- // Add dimensions from joinchains
+
for (JoinChain chain : cubeql.getJoinchains().values()) {
for (String dimName : chain.getIntermediateDimensions()) {
- joinChainDims.add(dimName);
cubeql.addOptionalDimTable(dimName, null, null, true);
}
}
- // add optional dimensions
- dimTables.addAll(cubeql.getOptionalDimensions());
// Remove target
dimTables.remove(target);
- if (dimTables.isEmpty() && !joinChainDims.contains(target)) {
+ if (dimTables.isEmpty() && cubeql.getJoinchains().isEmpty()) {
// Joins not required
- LOG.info("No dimension tables to resolve!");
+ LOG.info("No dimension tables to resolve and no join chains present!");
return;
}
+
+
SchemaGraph graph = cubeql.getMetastoreClient().getSchemaGraph();
- Map<Dimension, List<SchemaGraph.JoinPath>> multipleJoinPaths =
- new LinkedHashMap<Dimension, List<SchemaGraph.JoinPath>>();
+ Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> multipleJoinPaths =
+ new LinkedHashMap<Aliased<Dimension>, List<SchemaGraph.JoinPath>>();
+ Map<Dimension, String> dimensionAliasMap = new HashMap<Dimension, String>();
// Resolve join path for each dimension accessed in the query
for (Dimension joinee : dimTables) {
- if (joinChainDims.contains(joinee.getName()) || isJoinchainDestination(cubeql, joinee.getName())) {
- // this dimension is part of a path of the join chain
- // we should skip adding all paths
- LOG.info("Not adding allpaths to target for " + joinee);
- continue;
- }
- // Find all possible join paths
- SchemaGraph.GraphSearch search = new SchemaGraph.GraphSearch(joinee, target, graph);
- List<SchemaGraph.JoinPath> joinPaths = search.findAllPathsToTarget();
- if (joinPaths != null && !joinPaths.isEmpty()) {
- multipleJoinPaths.put(joinee, new ArrayList<SchemaGraph.JoinPath>(search.findAllPathsToTarget()));
- addOptionalTables(cubeql, multipleJoinPaths.get(joinee), cubeql.getDimensions().contains(joinee));
- } else {
- // No link to cube from this dim, can't proceed with query
- if (LOG.isDebugEnabled()) {
- graph.print();
- }
- LOG.warn("No join path between " + joinee.getName() + " and " + target.getName());
- if (cubeql.getDimensions().contains(joinee)) {
- throw new SemanticException(ErrorMsg.NO_JOIN_PATH, joinee.getName(), target.getName());
+ if(dimensionInJoinChain.get(joinee) == null) {
+ // Find all possible join paths
+ SchemaGraph.GraphSearch search = new SchemaGraph.GraphSearch(joinee, target, graph);
+ List<SchemaGraph.JoinPath> joinPaths = search.findAllPathsToTarget();
+ if (joinPaths != null && !joinPaths.isEmpty()) {
+ Aliased<Dimension> aliasedJoinee = Aliased.create(joinee);
+ multipleJoinPaths.put(aliasedJoinee, new ArrayList<SchemaGraph.JoinPath>(search.findAllPathsToTarget()));
+ addOptionalTables(cubeql, multipleJoinPaths.get(aliasedJoinee), cubeql.getDimensions().contains(joinee));
} else {
- // if joinee is optional dim table, remove those candidate facts
- Set<CandidateTable> candidates = cubeql.getOptionalDimensionMap().get(joinee).requiredForCandidates;
- for (CandidateTable candidate : candidates) {
- if (candidate instanceof CandidateFact) {
- if (cubeql.getCandidateFactTables().contains(candidate)) {
- LOG.info("Not considering fact:" + candidate + " as there is no join path to " + joinee);
- cubeql.getCandidateFactTables().remove(candidate);
- cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, new CandidateTablePruneCause(
- ((CandidateFact) candidate).fact.getName(), CubeTableCause.COLUMN_NOT_FOUND));
+ // No link to cube from this dim, can't proceed with query
+ if (LOG.isDebugEnabled()) {
+ graph.print();
+ }
+ LOG.warn("No join path between " + joinee.getName() + " and " + target.getName());
+ if (cubeql.getDimensions().contains(joinee)) {
+ throw new SemanticException(ErrorMsg.NO_JOIN_PATH, joinee.getName(), target.getName());
+ } else {
+ // if joinee is optional dim table, remove those candidate facts
+ Set<CandidateTable> candidates = cubeql.getOptionalDimensionMap().get(joinee).requiredForCandidates;
+ for (CandidateTable candidate : candidates) {
+ if (candidate instanceof CandidateFact) {
+ if (cubeql.getCandidateFactTables().contains(candidate)) {
+ LOG.info("Not considering fact:" + candidate + " as there is no join path to " + joinee);
+ cubeql.getCandidateFactTables().remove(candidate);
+ cubeql.addFactPruningMsgs(((CandidateFact) candidate).fact, new CandidateTablePruneCause(
+ ((CandidateFact) candidate).fact.getName(), CubeTableCause.COLUMN_NOT_FOUND));
+ }
+ } else if (cubeql.getCandidateDimTables().containsKey(((CandidateDim) candidate).getBaseTable())) {
+ LOG.info("Not considering dimtable:" + candidate + " as there is no join path to " + joinee);
+ cubeql.getCandidateDimTables().get(((CandidateDim) candidate).getBaseTable()).remove(candidate);
+ cubeql.addDimPruningMsgs(
+ (Dimension) candidate.getBaseTable(), (CubeDimensionTable) candidate.getTable(),
+ new CandidateTablePruneCause(candidate.getName(), CubeTableCause.COLUMN_NOT_FOUND)
+ );
}
- } else if (cubeql.getCandidateDimTables().containsKey(((CandidateDim) candidate).getBaseTable())) {
- LOG.info("Not considering dimtable:" + candidate + " as there is no join path to " + joinee);
- cubeql.getCandidateDimTables().get(((CandidateDim) candidate).getBaseTable()).remove(candidate);
- cubeql.addDimPruningMsgs((Dimension) candidate.getBaseTable(), (CubeDimensionTable) candidate.getTable(),
- new CandidateTablePruneCause(candidate.getName(), CubeTableCause.COLUMN_NOT_FOUND));
}
}
}
+ } else if(dimensionInJoinChain.get(joinee).size() > 1) {
+ throw new SemanticException("Table " + joinee.getName() + " has " +
+ dimensionInJoinChain.get(joinee).size() + " different paths through joinchains " +
+ "(" + dimensionInJoinChain.get(joinee) + ")" +
+ " used in query. Couldn't determine which one to use");
+ } else {
+ // the case when dimension is used only once in all joinchains.
+ if(isJoinchainDestination(cubeql, joinee)) {
+ throw new SemanticException("Table " + joinee.getName() + " is getting accessed via two different names: "
+ + "[" + dimensionInJoinChain.get(joinee).get(0).getName() + ", " + joinee.getName() + "]");
+ }
}
}
-
// populate paths from joinchains
for (JoinChain chain : cubeql.getJoinchains().values()) {
- multipleJoinPaths.put(cubeql.getMetastoreClient().getDimension(chain.getDestTable()),
- chain.getRelationEdges(cubeql.getMetastoreClient()));
+ Dimension dimension = cubeql.getMetastoreClient().getDimension(chain.getDestTable());
+ Aliased<Dimension> aliasedDimension = Aliased.create(dimension, chain.getName());
+ if(multipleJoinPaths.get(aliasedDimension) == null) {
+ multipleJoinPaths.put(aliasedDimension, new ArrayList<SchemaGraph.JoinPath>());
+ }
+ multipleJoinPaths.get(aliasedDimension).addAll(
+ chain.getRelationEdges(cubeql.getMetastoreClient()));
}
AutoJoinContext joinCtx =
- new AutoJoinContext(multipleJoinPaths, cubeql.optionalDimensions, partialJoinConditions, partialJoinChain,
- tableJoinTypeMap, target, conf.get(CubeQueryConfUtil.JOIN_TYPE_KEY), true);
+ new AutoJoinContext(multipleJoinPaths, cubeql.optionalDimensions, partialJoinConditions, partialJoinChain,
+ tableJoinTypeMap, target, conf.get(CubeQueryConfUtil.JOIN_TYPE_KEY), true);
cubeql.setAutoJoinCtx(joinCtx);
}
-
- private boolean isJoinchainDestination(CubeQueryContext cubeql, String dimName) {
+ private boolean isJoinchainDestination(CubeQueryContext cubeql, Dimension dimension) {
for (JoinChain chain : cubeql.getJoinchains().values()) {
- if (chain.getDestTable().equalsIgnoreCase(dimName)) {
+ if (chain.getDestTable().equalsIgnoreCase(dimension.getName())) {
return true;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
index 36c46fb..06550b8 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
@@ -229,7 +229,7 @@ class StorageTableResolver implements ContextRewriter {
if (!failOnPartialData || numParts > 0) {
storageTables.add(tableName);
String whereClause =
- StorageUtil.getWherePartClause(dim.getTimedDimension(), cubeql.getAliasForTabName(dim.getName()),
+ StorageUtil.getWherePartClause(dim.getTimedDimension(), null,
StorageConstants.getPartitionsForLatest());
whereClauses.put(tableName, whereClause);
} else {
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java
index fcfe207..6d724ea 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageUtil.java
@@ -36,7 +36,7 @@ class StorageUtil {
for (int i = 0; i < parts.size(); i++) {
partStr.append(sep);
partStr.append("(");
- partStr.append(tableName);
+ partStr.append(tableName != null ? tableName : "%s");
partStr.append(".");
partStr.append(timeDimName);
partStr.append(" = '");
@@ -142,4 +142,10 @@ class StorageUtil {
}
return Collections.singletonMap(maxCoveringStorage, maxCoveringSet);
}
+ public static String getWhereClause(String clause, String alias) {
+ return String.format(clause, alias);
+ }
+ public static String getWhereClause(CandidateDim dim, String alias) {
+ return getWhereClause(dim.whereClause, alias);
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java
index f8a4696..43dece3 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimerangeResolver.java
@@ -205,9 +205,9 @@ class TimerangeResolver implements ContextRewriter {
LOG.info("Timerange queried is not in column life for " + column
+ ", Removing join paths containing the column");
// Remove join paths containing this column
- Map<Dimension, List<SchemaGraph.JoinPath>> allPaths = joinContext.getAllPaths();
+ Map<Aliased<Dimension>, List<SchemaGraph.JoinPath>> allPaths = joinContext.getAllPaths();
- for (Dimension dimension : allPaths.keySet()) {
+ for (Aliased<Dimension> dimension : allPaths.keySet()) {
List<SchemaGraph.JoinPath> joinPaths = allPaths.get(dimension);
Iterator<SchemaGraph.JoinPath> joinPathIterator = joinPaths.iterator();
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index 0e414df..18154cf 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -655,28 +655,78 @@ public class CubeTestSetup {
cubeProperties.put(MetastoreConstants.TIMEDIM_TO_PART_MAPPING_PFX + "pt", "pt");
cubeProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "false");
- Set<JoinChain> joinchains = new HashSet<JoinChain>();
- JoinChain cityState = new JoinChain("cityState", "city-state", "state thru city");
- List<TableReference> statePaths1 = new ArrayList<TableReference>();
- statePaths1.add(new TableReference("basecube", "cityid"));
- statePaths1.add(new TableReference("citydim", "id"));
- statePaths1.add(new TableReference("citydim", "stateid"));
- statePaths1.add(new TableReference("statedim", "id"));
- cityState.addPath(statePaths1);
- List<TableReference> statePaths2 = new ArrayList<TableReference>();
- statePaths2.add(new TableReference("basecube", "cityid"));
- statePaths2.add(new TableReference("citydim", "id"));
- statePaths2.add(new TableReference("citydim", "statename"));
- statePaths2.add(new TableReference("statedim", "name"));
- cityState.addPath(statePaths2);
- joinchains.add(cityState);
-
- JoinChain cubeState = new JoinChain("cubeState", "cube-state", "state thru cube");
- List<TableReference> statePaths3 = new ArrayList<TableReference>();
- statePaths3.add(new TableReference("basecube", "stateid"));
- statePaths3.add(new TableReference("statedim", "id"));
- cubeState.addPath(statePaths3);
- joinchains.add(cubeState);
+ Set<JoinChain> joinchains = new HashSet<JoinChain>(){
+ {
+ add(new JoinChain("cityState", "city-state", "state thru city") {
+ {
+ addPath(new ArrayList<TableReference>() {
+ {
+ add(new TableReference("basecube", "cityid"));
+ add(new TableReference("citydim", "id"));
+ add(new TableReference("citydim", "stateid"));
+ add(new TableReference("statedim", "id"));
+ }
+ });
+ addPath(new ArrayList<TableReference>() {
+ {
+ add(new TableReference("basecube", "cityid"));
+ add(new TableReference("citydim", "id"));
+ add(new TableReference("citydim", "statename"));
+ add(new TableReference("statedim", "name"));
+ }
+ });
+ }
+ });
+ add(new JoinChain("cubeState", "cube-state", "state thru cube"){
+ {
+ addPath(new ArrayList<TableReference>(){
+ {
+ add(new TableReference("basecube", "stateid"));
+ add(new TableReference("statedim", "id"));
+ }
+ });
+ }
+ });
+ add(new JoinChain("cityZip", "city-zip", "zip thru city"){
+ {
+ addPath(new ArrayList<TableReference>(){
+ {
+ add(new TableReference("basecube", "cityid"));
+ add(new TableReference("citydim", "id"));
+ add(new TableReference("citydim", "zipcode"));
+ add(new TableReference("zipdim", "code"));
+ }
+ });
+ }
+ });
+ add(new JoinChain("cubeStateCountry", "cube-state-country", "country through state"){
+ {
+ addPath(new ArrayList<TableReference>(){
+ {
+ add(new TableReference("basecube", "stateid"));
+ add(new TableReference("statedim", "id"));
+ add(new TableReference("statedim", "countryid"));
+ add(new TableReference("countrydim", "id"));
+ }
+ });
+ }
+ });
+ add(new JoinChain("cubeCityStateCountry", "cube-city-state-country", "country through state through city"){
+ {
+ addPath(new ArrayList<TableReference>(){
+ {
+ add(new TableReference("basecube", "cityid"));
+ add(new TableReference("citydim", "id"));
+ add(new TableReference("citydim", "stateid"));
+ add(new TableReference("statedim", "id"));
+ add(new TableReference("statedim", "countryid"));
+ add(new TableReference("countrydim", "id"));
+ }
+ });
+ }
+ });
+ }
+ };
// add ref dim through chain
cubeDimensions2.add(new ReferencedDimAtrribute(
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
index 9ddc454..cb70b66 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
@@ -640,10 +640,11 @@ public class TestCubeRewriter extends TestQueryRewrite {
+ " format_number(SUM(msr1)-(SUM(msr2)+SUM(msr3)),\"##################.###\") AS a6"
+ " FROM testCube where " + twoDaysRange + " HAVING (SUM(msr1) >=1000) AND (SUM(msr2)>=0.01)", conf);
String actualExpr =
- " join " + getDbName() + "c1_citytable citydim on testcube.cityid = citydim.id and (citydim.dt = 'latest')"
- + " join " + getDbName()
- + "c1_ziptable zipdim on testcube.zipcode = zipdim.code and (zipdim.dt = 'latest') " + " join "
- + getDbName() + "c1_statetable statedim on testcube.stateid = statedim.id and (statedim.dt = 'latest')";
+ ""
+ + " join " + getDbName() + "c1_statetable statedim on testcube.stateid = statedim.id and (statedim.dt = 'latest')"
+ + " join " + getDbName() + "c1_ziptable zipdim on testcube.zipcode = zipdim.code and (zipdim.dt = 'latest') "
+ + " join " + getDbName() + "c1_citytable citydim on testcube.cityid = citydim.id and (citydim.dt = 'latest')"
+ + "";
expected =
getExpectedQuery(
cubeName,
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/e2c0547d/lens-cube/src/test/java/org/apache/lens/cube/parse/TestExpressionResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestExpressionResolver.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestExpressionResolver.java
index 173c458..771a4ce 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestExpressionResolver.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestExpressionResolver.java
@@ -171,13 +171,16 @@ public class TestExpressionResolver extends TestQueryRewrite {
rewrite("select cityAndState, avgmsr from testCube" + " where " + twoDaysRange + " and substrexpr != 'XYZ'",
conf);
- String joinExpr =
- "join " + getDbName() + "c1_citytable citydim"
- + " on testcube.cityid = citydim.id and (citydim.dt = 'latest') join" + getDbName()
+ String join1 =
+ " join " + getDbName() + "c1_citytable citydim"
+ + " on testcube.cityid = citydim.id and (citydim.dt = 'latest') ";
+ String join2 = " join" + getDbName()
+ "c1_statetable statedim on" + " testcube.stateid = statedim.id and (statedim.dt = 'latest')";
+ String joinExpr;
+
expected =
getExpectedQuery(cubeName, "select concat(citydim.name, \":\", statedim.name),"
- + " avg(testcube.msr1 + testcube.msr2) FROM ", joinExpr, null, " and substr(testcube.dim1, 3) != 'XYZ'"
+ + " avg(testcube.msr1 + testcube.msr2) FROM ", join2 + join1, null, " and substr(testcube.dim1, 3) != 'XYZ'"
+ " group by concat(citydim.name, \":\", statedim.name)", null, getWhereForHourly2days("C1_testfact2_raw"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
@@ -185,12 +188,11 @@ public class TestExpressionResolver extends TestQueryRewrite {
rewrite("select cityAndState, avgmsr from testCube tc" + " join citydim cd join statedim sd " + " where "
+ twoDaysRange + " and substrexpr != 'XYZ'", conf);
- joinExpr =
- " inner join " + getDbName() + "c1_citytable cd" + " on tc.cityid = cd.id and (cd.dt = 'latest')"
- + " inner join" + getDbName() + "c1_statetable sd on" + " tc.stateid = sd.id and (sd.dt = 'latest')";
+ join1 = " inner join " + getDbName() + "c1_citytable cd" + " on tc.cityid = cd.id and (cd.dt = 'latest')";
+ join2 = " inner join" + getDbName() + "c1_statetable sd on" + " tc.stateid = sd.id and (sd.dt = 'latest')";
expected =
- getExpectedQuery("tc", "select concat(cd.name, \":\", sd.name)," + " avg(tc.msr1 + tc.msr2) FROM ", joinExpr,
- null, " and substr(tc.dim1, 3) != 'XYZ'" + " group by concat(cd.name, \":\", sd.name)", null,
+ getExpectedQuery("tc", "select concat(cd.name, \":\", sd.name)," + " avg(tc.msr1 + tc.msr2) FROM ",
+ join2 + join1, null, " and substr(tc.dim1, 3) != 'XYZ'" + " group by concat(cd.name, \":\", sd.name)", null,
getWhereForHourly2days("tc", "C1_testfact2_raw"));
TestCubeRewriter.compareQueries(expected, hqlQuery);
@@ -202,9 +204,9 @@ public class TestExpressionResolver extends TestQueryRewrite {
+ twoDaysRange, conf);
joinExpr =
+ "join" + getDbName() + "c1_statetable statedim on" + " testcube.stateid = statedim.id" +
" inner join " + getDbName() + "c1_citytable citydim" + " on testcube.cityid = citydim.id "
- + " and substr(testcube.dim1, 3) != 'XYZ' and (citydim.dt = 'latest') join" + getDbName()
- + "c1_statetable statedim on" + " testcube.stateid = statedim.id ";
+ + " and substr(testcube.dim1, 3) != 'XYZ' and (citydim.dt = 'latest') ";
expected =
getExpectedQuery(cubeName, "select concat(citydim.name, \":\", statedim.name),"
+ " avg(testcube.msr1 + testcube.msr2) FROM ", joinExpr, null,
@@ -265,12 +267,14 @@ public class TestExpressionResolver extends TestQueryRewrite {
public void testDimensionQuery() throws Exception {
String hqlQuery = rewrite("select citydim.name, cityaddress from" + " citydim", conf);
- String joinExpr =
- "join " + getDbName() + "c1_ziptable zipdim on" + " citydim.zipcode = zipdim.code and (zipdim.dt = 'latest')"
- + " join " + getDbName() + "c1_statetable statedim on"
- + " citydim.stateid = statedim.id and (statedim.dt = 'latest')" + " join " + getDbName()
+ String joinExpr;
+ String join1 =
+ " join " + getDbName() + "c1_ziptable zipdim on" + " citydim.zipcode = zipdim.code and (zipdim.dt = 'latest')";
+ String join2 = " join " + getDbName() + "c1_statetable statedim on"
+ + " citydim.stateid = statedim.id and (statedim.dt = 'latest')";
+ String join3 = " join " + getDbName()
+ "c1_countrytable countrydim on" + " statedim.countryid = countrydim.id";
-
+ joinExpr = join2 + join3 + join1;
String expected =
getExpectedQuery("citydim", "SELECT citydim.name, concat((citydim.name), \":\", (statedim.name ),"
+ " \":\",(countrydim.name), \":\" , ( zipdim . code )) FROM ", joinExpr, null, null, "c1_citytable", true);
@@ -279,10 +283,11 @@ public class TestExpressionResolver extends TestQueryRewrite {
hqlQuery = rewrite("select ct.name, ct.cityaddress from" + " citydim ct", conf);
joinExpr =
- "join " + getDbName() + ".c1_ziptable zipdim on " + "ct.zipcode = zipdim.code and (zipdim.dt = 'latest')"
- + " join " + getDbName() + ".c1_statetable statedim on "
- + "ct.stateid = statedim.id and (statedim.dt = 'latest')" + " join " + getDbName()
- + ".c1_countrytable countrydim on " + "statedim.countryid = countrydim.id";
+ "" +
+ " join " + getDbName() + ".c1_statetable statedim on " + "ct.stateid = statedim.id and (statedim.dt = 'latest')" +
+ " join " + getDbName() + ".c1_countrytable countrydim on " + "statedim.countryid = countrydim.id" +
+ " join " + getDbName() + ".c1_ziptable zipdim on " + "ct.zipcode = zipdim.code and (zipdim.dt = 'latest')" +
+ "";
expected =
getExpectedQuery("ct", "SELECT ct.name, concat((ct.name), \":\", (statedim.name ),"