You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2014/11/25 11:41:29 UTC
incubator-lens git commit: LENS-14 : Fixes lightest fact first config
to work with more than one storage (Rajat Khandelwal via amareshwari)
Repository: incubator-lens
Updated Branches:
refs/heads/master 55e30561a -> ab3700641
LENS-14 : Fixes lightest fact first config to work with more than one storage (Rajat Khandelwal via amareshwari)
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/ab370064
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/ab370064
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/ab370064
Branch: refs/heads/master
Commit: ab37006415129ad033efbc10dd5956150c474ea0
Parents: 55e3056
Author: Amareshwari Sriramdasu <am...@inmobi.com>
Authored: Tue Nov 25 16:11:13 2014 +0530
Committer: Amareshwari Sriramdasu <am...@inmobi.com>
Committed: Tue Nov 25 16:11:13 2014 +0530
----------------------------------------------------------------------
.../lens/cube/parse/CubeQueryRewriter.java | 12 ++-
.../lens/cube/parse/StorageTableResolver.java | 57 +++++++---
.../apache/lens/cube/parse/CubeTestSetup.java | 103 ++++++++++++++++---
3 files changed, 139 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
index 81c0d12..1325c05 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
@@ -127,6 +127,7 @@ public class CubeQueryRewriter {
rewriters.add(new AliasReplacer(conf));
DenormalizationResolver denormResolver = new DenormalizationResolver(conf);
CandidateTableResolver candidateTblResolver = new CandidateTableResolver(conf);
+ StorageTableResolver storageTableResolver = new StorageTableResolver(conf);
// De-normalized columns resolved
rewriters.add(denormResolver);
// Resolve candidate fact tables and dimension tables for columns queried
@@ -141,11 +142,18 @@ public class CubeQueryRewriter {
// Resolve aggregations and generate base select tree
rewriters.add(new AggregateResolver(conf));
rewriters.add(new GroupbyResolver(conf));
+ // Phase 1: resolve fact tables.
+ rewriters.add(storageTableResolver);
if (lightFactFirst) {
rewriters.add(new LightestFactResolver(conf));
}
- // Resolve storage partitions and table names
- rewriters.add(new StorageTableResolver(conf));
+ // Phase 2: resolve fact table partitions.
+ rewriters.add(storageTableResolver);
+ if (!lightFactFirst) {
+ rewriters.add(new LightestFactResolver(conf));
+ }
+ // Phase 3: resolve dimension tables and partitions.
+ rewriters.add(storageTableResolver);
// Check for candidate tables using de-normalized columns
rewriters.add(denormResolver);
rewriters.add(new LeastPartitionResolver(conf));
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
index afc44e1..3fb9d09 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
@@ -75,6 +75,20 @@ class StorageTableResolver implements ContextRewriter {
private final Map<String, List<String>> nonExistingPartitions = new HashMap<String, List<String>>();
private TimeRangeWriter rangeWriter;
private DateFormat partWhereClauseFormat = null;
+ private PHASE phase;
+
+ static enum PHASE {
+ FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS;
+ static PHASE first() {
+ return values()[0];
+ }
+ static PHASE last() {
+ return values()[values().length - 1];
+ }
+ PHASE next() {
+ return values()[(this.ordinal() + 1) % values().length];
+ }
+ }
public StorageTableResolver(Configuration conf) {
this.conf = conf;
@@ -104,6 +118,7 @@ class StorageTableResolver implements ContextRewriter {
if (formatStr != null) {
partWhereClauseFormat = new SimpleDateFormat(formatStr);
}
+ this.phase = PHASE.first();
}
private List<String> getSupportedStorages(Configuration conf) {
@@ -115,10 +130,8 @@ class StorageTableResolver implements ContextRewriter {
}
public boolean isStorageSupported(String storage) {
- if (!allStoragesSupported) {
- return supportedStorages.contains(storage);
- }
- return true;
+ return allStoragesSupported ||
+ supportedStorages.contains(storage);
}
Map<String, List<String>> storagePartMap = new HashMap<String, List<String>>();
@@ -127,16 +140,28 @@ class StorageTableResolver implements ContextRewriter {
public void rewriteContext(CubeQueryContext cubeql) throws SemanticException {
client = cubeql.getMetastoreClient();
- if (!cubeql.getCandidateFactTables().isEmpty()) {
- // resolve storage table names
- resolveFactStorageTableNames(cubeql);
- // resolve storage partitions
- resolveFactStoragePartitions(cubeql);
+ switch(phase) {
+ case FACT_TABLES:
+ if (!cubeql.getCandidateFactTables().isEmpty()) {
+ // resolve storage table names
+ resolveFactStorageTableNames(cubeql);
+ }
+ cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+ break;
+ case FACT_PARTITIONS:
+ if (!cubeql.getCandidateFactTables().isEmpty()) {
+ // resolve storage partitions
+ resolveFactStoragePartitions(cubeql);
+ }
+ cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+ break;
+ case DIM_TABLE_AND_PARTITIONS:
+ resolveDimStorageTablesAndPartitions(cubeql);
+ break;
}
- // resolve dimension tables
- resolveDimStorageTablesAndPartitions(cubeql);
+ //Doing this on all three phases. Keep updating cubeql with the current identified missing partitions.
cubeql.setNonexistingParts(nonExistingPartitions);
- cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+ phase = phase.next();
}
private void resolveDimStorageTablesAndPartitions(CubeQueryContext cubeql) throws SemanticException {
@@ -195,11 +220,11 @@ class StorageTableResolver implements ContextRewriter {
} else {
LOG.info("Partition " + StorageConstants.LATEST_PARTITION_VALUE + " does not exist on " + tableName);
}
- if (!failOnPartialData || (failOnPartialData && numParts > 0)) {
+ if (!failOnPartialData || numParts > 0) {
storageTables.add(tableName);
String whereClause =
- StorageUtil.getWherePartClause(dim.getTimedDimension(), cubeql.getAliasForTabName(dim.getName()),
- StorageConstants.getPartitionsForLatest());
+ StorageUtil.getWherePartClause(dim.getTimedDimension(), cubeql.getAliasForTabName(dim.getName()),
+ StorageConstants.getPartitionsForLatest());
whereClauses.put(tableName, whereClause);
} else {
LOG.info("Not considering dim storage table:" + tableName + " as no dim partitions exist");
@@ -347,7 +372,7 @@ class StorageTableResolver implements ContextRewriter {
cfact.numQueriedParts += rangeParts.size();
answeringParts.addAll(rangeParts);
cfact.rangeToWhereClause.put(range, rangeWriter.getTimeRangeWhereClause(cubeql,
- cubeql.getAliasForTabName(cubeql.getCube().getName()), rangeParts));
+ cubeql.getAliasForTabName(cubeql.getCube().getName()), rangeParts));
}
if (!nonExistingParts.isEmpty()) {
addNonExistingParts(cfact.fact.getName(), nonExistingParts);
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index e40db8d..485ff46 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -42,13 +42,11 @@ import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ParseException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.lens.cube.metadata.BaseDimAttribute;
import org.apache.lens.cube.metadata.ColumnMeasure;
-import org.apache.lens.cube.metadata.Cube;
import org.apache.lens.cube.metadata.CubeDimAttribute;
import org.apache.lens.cube.metadata.CubeFactTable;
import org.apache.lens.cube.metadata.CubeMeasure;
@@ -67,11 +65,6 @@ import org.apache.lens.cube.metadata.StorageTableDesc;
import org.apache.lens.cube.metadata.TableReference;
import org.apache.lens.cube.metadata.TestCubeMetastoreClient;
import org.apache.lens.cube.metadata.UpdatePeriod;
-import org.apache.lens.cube.parse.CubeQueryContext;
-import org.apache.lens.cube.parse.CubeQueryRewriter;
-import org.apache.lens.cube.parse.DateUtil;
-import org.apache.lens.cube.parse.HQLParser;
-import org.apache.lens.cube.parse.StorageUtil;
import org.testng.Assert;
/*
@@ -84,7 +77,9 @@ import org.testng.Assert;
* testFact2 : {C1} -> {Hourly}
* testFactMonthly : {C2} -> {Monthly}
* summary1,summary2,summary3 - {C1, C2} -> {daily, hourly, minutely}
+ * cheapFact: {C99} -> {Minutely, hourly, daily, monthly, quarterly, yearly}
* C2 has multiple dated partitions
+ * C99 is not to be used as supported storage in testcases
*
* CityTable : C1 - SNAPSHOT and C2 - NO snapshot
*
@@ -129,6 +124,7 @@ public class CubeTestSetup {
private static String c2 = "C2";
private static String c3 = "C3";
private static String c4 = "C4";
+ private static String c99 = "C99";
public static void init() {
if (inited) {
@@ -544,7 +540,7 @@ public class CubeTestSetup {
dimensions.add("dim1");
dimensions.add("dim2");
client
- .createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, measures, dimensions, new HashMap<String, String>(), 0L);
+ .createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, measures, dimensions, new HashMap<String, String>(), 5L);
}
private void createBaseAndDerivedCubes(CubeMetastoreClient client) throws HiveException, ParseException {
@@ -580,7 +576,7 @@ public class CubeTestSetup {
Set<String> dimensions = new HashSet<String>();
dimensions.add("dim1");
dimensions.add("dim11");
- client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 0L);
+ client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 5L);
measures = new HashSet<String>();
measures.add("msr2");
@@ -660,7 +656,7 @@ public class CubeTestSetup {
factColumns.add(new FieldSchema("dim11", "string", "base dim"));
// create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
// create fact only with extra measures
factName = "testFact2_BASE";
@@ -674,7 +670,7 @@ public class CubeTestSetup {
factColumns.add(new FieldSchema("dim2", "int", "dim2 id"));
// create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
// create fact only with extra measures
factName = "testFact3_BASE";
@@ -687,7 +683,7 @@ public class CubeTestSetup {
factColumns.add(new FieldSchema("dim11", "string", "base dim"));
// create cube fact
- client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
// create raw fact only with extra measures
factName = "testFact2_RAW_BASE";
@@ -792,7 +788,7 @@ public class CubeTestSetup {
storageTables.put(c2, s1);
storageTables.put(c3, s1);
// create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
CubeFactTable fact = client.getFactTable(factName);
// Add all hourly partitions for two days
@@ -820,6 +816,81 @@ public class CubeTestSetup {
temp = cal.getTime();
}
}
+ private void createCubeCheapFact(CubeMetastoreClient client) throws HiveException {
+ String factName = "cheapFact";
+ List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
+ for (CubeMeasure measure : cubeMeasures) {
+ factColumns.add(measure.getColumn());
+ }
+
+ // add dimensions of the cube
+ factColumns.add(new FieldSchema("zipcode", "int", "zip"));
+ factColumns.add(new FieldSchema("cityid", "int", "city id"));
+ factColumns.add(new FieldSchema("stateid", "int", "city id"));
+ factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id"));
+ factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity"));
+
+ Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
+ Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
+ updates.add(UpdatePeriod.MINUTELY);
+ updates.add(UpdatePeriod.HOURLY);
+ updates.add(UpdatePeriod.DAILY);
+ updates.add(UpdatePeriod.MONTHLY);
+ updates.add(UpdatePeriod.QUARTERLY);
+ updates.add(UpdatePeriod.YEARLY);
+
+ ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
+ List<String> timePartCols = new ArrayList<String>();
+ partCols.add(TestCubeMetastoreClient.getDatePartition());
+ timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
+
+ StorageTableDesc s1 = new StorageTableDesc();
+ s1.setInputFormat(TextInputFormat.class.getCanonicalName());
+ s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ s1.setPartCols(partCols);
+ s1.setTimePartCols(timePartCols);
+
+ StorageTableDesc s2 = new StorageTableDesc();
+ s2.setInputFormat(TextInputFormat.class.getCanonicalName());
+ s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+ ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
+ s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
+ s2.setPartCols(s2PartCols);
+ s2.setTimePartCols(Arrays.asList("ttd"));
+
+ storageAggregatePeriods.put(c99, updates);
+
+ Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
+ storageTables.put(c99, s2);
+ // create cube fact
+ client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+
+ CubeFactTable fact = client.getFactTable(factName);
+ // Add all hourly partitions for two days
+ Calendar cal = Calendar.getInstance();
+ cal.setTime(twodaysBack);
+ Date temp = cal.getTime();
+ while (!(temp.after(now))) {
+ Map<String, Date> timeParts = new HashMap<String, Date>();
+ timeParts.put("ttd", temp);
+ StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, UpdatePeriod.HOURLY);
+ client.addPartition(sPartSpec, c99);
+ cal.add(Calendar.HOUR_OF_DAY, 1);
+ temp = cal.getTime();
+ }
+
+ // Add all hourly partitions for twoDaysRangeBefore4days
+ cal.setTime(before4daysStart);
+ temp = cal.getTime();
+ while (!(temp.after(before4daysEnd))) {
+ Map<String, Date> timeParts = new HashMap<String, Date>();
+ timeParts.put("ttd", temp);
+ StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, UpdatePeriod.HOURLY);
+ client.addPartition(sPartSpec, c99);
+ cal.add(Calendar.HOUR_OF_DAY, 1);
+ temp = cal.getTime();
+ }
+ }
private void createCubeFactWeekly(CubeMetastoreClient client) throws HiveException {
String factName = "testFactWeekly";
@@ -849,7 +920,7 @@ public class CubeTestSetup {
Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
storageTables.put(c1, s1);
// create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
}
private void createCubeFactOnlyHourly(CubeMetastoreClient client) throws HiveException {
@@ -998,7 +1069,7 @@ public class CubeTestSetup {
storageTables.put(c2, s1);
// create cube fact
- client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+ client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
}
// DimWithTwoStorages
@@ -1476,9 +1547,11 @@ public class CubeTestSetup {
client.createStorage(new HDFSStorage(c2));
client.createStorage(new HDFSStorage(c3));
client.createStorage(new HDFSStorage(c4));
+ client.createStorage(new HDFSStorage(c99));
createCube(client);
createBaseAndDerivedCubes(client);
createCubeFact(client);
+ createCubeCheapFact(client);
// commenting this as the week date format throws IllegalPatternException
// createCubeFactWeekly(client);
createCubeFactOnlyHourly(client);