You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2014/11/25 11:41:29 UTC

incubator-lens git commit: LENS-14 : Fixes lightest fact first config to work with more than one storage (Rajat Khandelwal via amareshwari)

Repository: incubator-lens
Updated Branches:
  refs/heads/master 55e30561a -> ab3700641


LENS-14 : Fixes lightest fact first config to work with more than one storage (Rajat Khandelwal via amareshwari)


Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/ab370064
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/ab370064
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/ab370064

Branch: refs/heads/master
Commit: ab37006415129ad033efbc10dd5956150c474ea0
Parents: 55e3056
Author: Amareshwari Sriramdasu <am...@inmobi.com>
Authored: Tue Nov 25 16:11:13 2014 +0530
Committer: Amareshwari Sriramdasu <am...@inmobi.com>
Committed: Tue Nov 25 16:11:13 2014 +0530

----------------------------------------------------------------------
 .../lens/cube/parse/CubeQueryRewriter.java      |  12 ++-
 .../lens/cube/parse/StorageTableResolver.java   |  57 +++++++---
 .../apache/lens/cube/parse/CubeTestSetup.java   | 103 ++++++++++++++++---
 3 files changed, 139 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
index 81c0d12..1325c05 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java
@@ -127,6 +127,7 @@ public class CubeQueryRewriter {
     rewriters.add(new AliasReplacer(conf));
     DenormalizationResolver denormResolver = new DenormalizationResolver(conf);
     CandidateTableResolver candidateTblResolver = new CandidateTableResolver(conf);
+    StorageTableResolver storageTableResolver = new StorageTableResolver(conf);
     // De-normalized columns resolved
     rewriters.add(denormResolver);
     // Resolve candidate fact tables and dimension tables for columns queried
@@ -141,11 +142,18 @@ public class CubeQueryRewriter {
     // Resolve aggregations and generate base select tree
     rewriters.add(new AggregateResolver(conf));
     rewriters.add(new GroupbyResolver(conf));
+    // Phase 1: resolve fact tables.
+    rewriters.add(storageTableResolver);
     if (lightFactFirst) {
       rewriters.add(new LightestFactResolver(conf));
     }
-    // Resolve storage partitions and table names
-    rewriters.add(new StorageTableResolver(conf));
+    // Phase 2: resolve fact table partitions.
+    rewriters.add(storageTableResolver);
+    if (!lightFactFirst) {
+      rewriters.add(new LightestFactResolver(conf));
+    }
+    // Phase 3: resolve dimension tables and partitions.
+    rewriters.add(storageTableResolver);
     // Check for candidate tables using de-normalized columns
     rewriters.add(denormResolver);
     rewriters.add(new LeastPartitionResolver(conf));

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
index afc44e1..3fb9d09 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
@@ -75,6 +75,20 @@ class StorageTableResolver implements ContextRewriter {
   private final Map<String, List<String>> nonExistingPartitions = new HashMap<String, List<String>>();
   private TimeRangeWriter rangeWriter;
   private DateFormat partWhereClauseFormat = null;
+  private PHASE phase;
+
+  static enum PHASE {
+    FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS;
+    static PHASE first() {
+      return values()[0];
+    }
+    static PHASE last() {
+      return values()[values().length - 1];
+    }
+    PHASE next() {
+      return values()[(this.ordinal() + 1) % values().length];
+    }
+  }
 
   public StorageTableResolver(Configuration conf) {
     this.conf = conf;
@@ -104,6 +118,7 @@ class StorageTableResolver implements ContextRewriter {
     if (formatStr != null) {
       partWhereClauseFormat = new SimpleDateFormat(formatStr);
     }
+    this.phase = PHASE.first();
   }
 
   private List<String> getSupportedStorages(Configuration conf) {
@@ -115,10 +130,8 @@ class StorageTableResolver implements ContextRewriter {
   }
 
   public boolean isStorageSupported(String storage) {
-    if (!allStoragesSupported) {
-      return supportedStorages.contains(storage);
-    }
-    return true;
+    return allStoragesSupported ||
+      supportedStorages.contains(storage);
   }
 
   Map<String, List<String>> storagePartMap = new HashMap<String, List<String>>();
@@ -127,16 +140,28 @@ class StorageTableResolver implements ContextRewriter {
   public void rewriteContext(CubeQueryContext cubeql) throws SemanticException {
     client = cubeql.getMetastoreClient();
 
-    if (!cubeql.getCandidateFactTables().isEmpty()) {
-      // resolve storage table names
-      resolveFactStorageTableNames(cubeql);
-      // resolve storage partitions
-      resolveFactStoragePartitions(cubeql);
+    switch(phase) {
+      case FACT_TABLES:
+        if (!cubeql.getCandidateFactTables().isEmpty()) {
+          // resolve storage table names
+          resolveFactStorageTableNames(cubeql);
+        }
+        cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+        break;
+      case FACT_PARTITIONS:
+        if (!cubeql.getCandidateFactTables().isEmpty()) {
+          // resolve storage partitions
+          resolveFactStoragePartitions(cubeql);
+        }
+        cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+        break;
+      case DIM_TABLE_AND_PARTITIONS:
+        resolveDimStorageTablesAndPartitions(cubeql);
+        break;
     }
-    // resolve dimension tables
-    resolveDimStorageTablesAndPartitions(cubeql);
+    //Doing this on all three phases. Keep updating cubeql with the current identified missing partitions.
     cubeql.setNonexistingParts(nonExistingPartitions);
-    cubeql.pruneCandidateFactSet(CubeTableCause.NO_CANDIDATE_STORAGES);
+    phase = phase.next();
   }
 
   private void resolveDimStorageTablesAndPartitions(CubeQueryContext cubeql) throws SemanticException {
@@ -195,11 +220,11 @@ class StorageTableResolver implements ContextRewriter {
               } else {
                 LOG.info("Partition " + StorageConstants.LATEST_PARTITION_VALUE + " does not exist on " + tableName);
               }
-              if (!failOnPartialData || (failOnPartialData && numParts > 0)) {
+              if (!failOnPartialData || numParts > 0) {
                 storageTables.add(tableName);
                 String whereClause =
-                    StorageUtil.getWherePartClause(dim.getTimedDimension(), cubeql.getAliasForTabName(dim.getName()),
-                        StorageConstants.getPartitionsForLatest());
+                  StorageUtil.getWherePartClause(dim.getTimedDimension(), cubeql.getAliasForTabName(dim.getName()),
+                    StorageConstants.getPartitionsForLatest());
                 whereClauses.put(tableName, whereClause);
               } else {
                 LOG.info("Not considering dim storage table:" + tableName + " as no dim partitions exist");
@@ -347,7 +372,7 @@ class StorageTableResolver implements ContextRewriter {
         cfact.numQueriedParts += rangeParts.size();
         answeringParts.addAll(rangeParts);
         cfact.rangeToWhereClause.put(range, rangeWriter.getTimeRangeWhereClause(cubeql,
-            cubeql.getAliasForTabName(cubeql.getCube().getName()), rangeParts));
+          cubeql.getAliasForTabName(cubeql.getCube().getName()), rangeParts));
       }
       if (!nonExistingParts.isEmpty()) {
         addNonExistingParts(cfact.fact.getName(), nonExistingParts);

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/ab370064/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index e40db8d..485ff46 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -42,13 +42,11 @@ import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.ParseException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.lens.cube.metadata.BaseDimAttribute;
 import org.apache.lens.cube.metadata.ColumnMeasure;
-import org.apache.lens.cube.metadata.Cube;
 import org.apache.lens.cube.metadata.CubeDimAttribute;
 import org.apache.lens.cube.metadata.CubeFactTable;
 import org.apache.lens.cube.metadata.CubeMeasure;
@@ -67,11 +65,6 @@ import org.apache.lens.cube.metadata.StorageTableDesc;
 import org.apache.lens.cube.metadata.TableReference;
 import org.apache.lens.cube.metadata.TestCubeMetastoreClient;
 import org.apache.lens.cube.metadata.UpdatePeriod;
-import org.apache.lens.cube.parse.CubeQueryContext;
-import org.apache.lens.cube.parse.CubeQueryRewriter;
-import org.apache.lens.cube.parse.DateUtil;
-import org.apache.lens.cube.parse.HQLParser;
-import org.apache.lens.cube.parse.StorageUtil;
 import org.testng.Assert;
 
 /*
@@ -84,7 +77,9 @@ import org.testng.Assert;
  * testFact2 : {C1} -> {Hourly}
  * testFactMonthly : {C2} -> {Monthly}
  * summary1,summary2,summary3 - {C1, C2} -> {daily, hourly, minutely}
+ * cheapFact: {C99} -> {Minutely, hourly, daily, monthly, quarterly, yearly}
  *   C2 has multiple dated partitions
+ *   C99 is not to be used as supported storage in testcases
  *
  * CityTable : C1 - SNAPSHOT and C2 - NO snapshot
  * 
@@ -129,6 +124,7 @@ public class CubeTestSetup {
   private static String c2 = "C2";
   private static String c3 = "C3";
   private static String c4 = "C4";
+  private static String c99 = "C99";
 
   public static void init() {
     if (inited) {
@@ -544,7 +540,7 @@ public class CubeTestSetup {
     dimensions.add("dim1");
     dimensions.add("dim2");
     client
-        .createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, measures, dimensions, new HashMap<String, String>(), 0L);
+        .createDerivedCube(TEST_CUBE_NAME, DERIVED_CUBE_NAME, measures, dimensions, new HashMap<String, String>(), 5L);
   }
 
   private void createBaseAndDerivedCubes(CubeMetastoreClient client) throws HiveException, ParseException {
@@ -580,7 +576,7 @@ public class CubeTestSetup {
     Set<String> dimensions = new HashSet<String>();
     dimensions.add("dim1");
     dimensions.add("dim11");
-    client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 0L);
+    client.createDerivedCube(BASE_CUBE_NAME, DERIVED_CUBE_NAME1, measures, dimensions, derivedProperties, 5L);
 
     measures = new HashSet<String>();
     measures.add("msr2");
@@ -660,7 +656,7 @@ public class CubeTestSetup {
     factColumns.add(new FieldSchema("dim11", "string", "base dim"));
 
     // create cube fact
-    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
 
     // create fact only with extra measures
     factName = "testFact2_BASE";
@@ -674,7 +670,7 @@ public class CubeTestSetup {
     factColumns.add(new FieldSchema("dim2", "int", "dim2 id"));
 
     // create cube fact
-    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
 
     // create fact only with extra measures
     factName = "testFact3_BASE";
@@ -687,7 +683,7 @@ public class CubeTestSetup {
     factColumns.add(new FieldSchema("dim11", "string", "base dim"));
 
     // create cube fact
-    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
 
     // create raw fact only with extra measures
     factName = "testFact2_RAW_BASE";
@@ -792,7 +788,7 @@ public class CubeTestSetup {
     storageTables.put(c2, s1);
     storageTables.put(c3, s1);
     // create cube fact
-    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
 
     CubeFactTable fact = client.getFactTable(factName);
     // Add all hourly partitions for two days
@@ -820,6 +816,81 @@ public class CubeTestSetup {
       temp = cal.getTime();
     }
   }
+  private void createCubeCheapFact(CubeMetastoreClient client) throws HiveException {
+    String factName = "cheapFact";
+    List<FieldSchema> factColumns = new ArrayList<FieldSchema>(cubeMeasures.size());
+    for (CubeMeasure measure : cubeMeasures) {
+      factColumns.add(measure.getColumn());
+    }
+
+    // add dimensions of the cube
+    factColumns.add(new FieldSchema("zipcode", "int", "zip"));
+    factColumns.add(new FieldSchema("cityid", "int", "city id"));
+    factColumns.add(new FieldSchema("stateid", "int", "city id"));
+    factColumns.add(new FieldSchema("test_time_dim_hour_id", "int", "time id"));
+    factColumns.add(new FieldSchema("ambigdim1", "string", "used in" + " testColumnAmbiguity"));
+
+    Map<String, Set<UpdatePeriod>> storageAggregatePeriods = new HashMap<String, Set<UpdatePeriod>>();
+    Set<UpdatePeriod> updates = new HashSet<UpdatePeriod>();
+    updates.add(UpdatePeriod.MINUTELY);
+    updates.add(UpdatePeriod.HOURLY);
+    updates.add(UpdatePeriod.DAILY);
+    updates.add(UpdatePeriod.MONTHLY);
+    updates.add(UpdatePeriod.QUARTERLY);
+    updates.add(UpdatePeriod.YEARLY);
+
+    ArrayList<FieldSchema> partCols = new ArrayList<FieldSchema>();
+    List<String> timePartCols = new ArrayList<String>();
+    partCols.add(TestCubeMetastoreClient.getDatePartition());
+    timePartCols.add(TestCubeMetastoreClient.getDatePartitionKey());
+
+    StorageTableDesc s1 = new StorageTableDesc();
+    s1.setInputFormat(TextInputFormat.class.getCanonicalName());
+    s1.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+    s1.setPartCols(partCols);
+    s1.setTimePartCols(timePartCols);
+
+    StorageTableDesc s2 = new StorageTableDesc();
+    s2.setInputFormat(TextInputFormat.class.getCanonicalName());
+    s2.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+    ArrayList<FieldSchema> s2PartCols = new ArrayList<FieldSchema>();
+    s2PartCols.add(new FieldSchema("ttd", serdeConstants.STRING_TYPE_NAME, "test date partition"));
+    s2.setPartCols(s2PartCols);
+    s2.setTimePartCols(Arrays.asList("ttd"));
+
+    storageAggregatePeriods.put(c99, updates);
+
+    Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
+    storageTables.put(c99, s2);
+    // create cube fact
+    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+
+    CubeFactTable fact = client.getFactTable(factName);
+    // Add all hourly partitions for two days
+    Calendar cal = Calendar.getInstance();
+    cal.setTime(twodaysBack);
+    Date temp = cal.getTime();
+    while (!(temp.after(now))) {
+      Map<String, Date> timeParts = new HashMap<String, Date>();
+      timeParts.put("ttd", temp);
+      StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, UpdatePeriod.HOURLY);
+      client.addPartition(sPartSpec, c99);
+      cal.add(Calendar.HOUR_OF_DAY, 1);
+      temp = cal.getTime();
+    }
+
+    // Add all hourly partitions for twoDaysRangeBefore4days
+    cal.setTime(before4daysStart);
+    temp = cal.getTime();
+    while (!(temp.after(before4daysEnd))) {
+      Map<String, Date> timeParts = new HashMap<String, Date>();
+      timeParts.put("ttd", temp);
+      StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, UpdatePeriod.HOURLY);
+      client.addPartition(sPartSpec, c99);
+      cal.add(Calendar.HOUR_OF_DAY, 1);
+      temp = cal.getTime();
+    }
+  }
 
   private void createCubeFactWeekly(CubeMetastoreClient client) throws HiveException {
     String factName = "testFactWeekly";
@@ -849,7 +920,7 @@ public class CubeTestSetup {
     Map<String, StorageTableDesc> storageTables = new HashMap<String, StorageTableDesc>();
     storageTables.put(c1, s1);
     // create cube fact
-    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
   }
 
   private void createCubeFactOnlyHourly(CubeMetastoreClient client) throws HiveException {
@@ -998,7 +1069,7 @@ public class CubeTestSetup {
     storageTables.put(c2, s1);
 
     // create cube fact
-    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 0L, null, storageTables);
+    client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 5L, null, storageTables);
   }
 
   // DimWithTwoStorages
@@ -1476,9 +1547,11 @@ public class CubeTestSetup {
       client.createStorage(new HDFSStorage(c2));
       client.createStorage(new HDFSStorage(c3));
       client.createStorage(new HDFSStorage(c4));
+      client.createStorage(new HDFSStorage(c99));
       createCube(client);
       createBaseAndDerivedCubes(client);
       createCubeFact(client);
+      createCubeCheapFact(client);
       // commenting this as the week date format throws IllegalPatternException
       // createCubeFactWeekly(client);
       createCubeFactOnlyHourly(client);