You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/05/13 14:42:42 UTC

svn commit: r1481826 - in /hive/branches/HIVE-4115/ql/src: java/org/apache/hadoop/hive/ql/cube/parse/ test/org/apache/hadoop/hive/ql/cube/parse/ test/org/apache/hadoop/hive/ql/cube/processors/

Author: amareshwari
Date: Mon May 13 12:42:41 2013
New Revision: 1481826

URL: http://svn.apache.org/r1481826
Log:
Add configuration for valid fact tables and storage tables

Added:
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java
Modified:
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java
    hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
    hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java?rev=1481826&r1=1481825&r2=1481826&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AliasReplacer.java Mon May 13 12:42:41 2013
@@ -69,10 +69,8 @@ public class AliasReplacer implements Co
     }
 
     // Update the aggregate expression set
-    System.out.println("AggrSet Before:" + cubeql.aggregateExprs.toString());
     updateAggregates(selectAST, cubeql);
     updateAggregates(havingAST, cubeql);
-    System.out.println("AggrSet After:" + cubeql.aggregateExprs.toString());
   }
 
   private void replaceAliases(ASTNode node, int nodePos, Map<String, String> colToTableAlias) {

Added: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java?rev=1481826&view=auto
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java (added)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryConstants.java Mon May 13 12:42:41 2013
@@ -0,0 +1,9 @@
+package org.apache.hadoop.hive.ql.cube.parse;
+
+public interface CubeQueryConstants {
+  public static final String VALID_FACT_TABLES = "cube.query.valid.fact.tables";
+  public static final String VALID_STORAGE_FACT_TABLES = "cube.query.valid." +
+  		"fact.storagetables";
+  public static final String VALID_STORAGE_DIM_TABLES = "cube.query.valid." +
+  		"dim.storgaetables";
+}

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java?rev=1481826&r1=1481825&r2=1481826&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java Mon May 13 12:42:41 2013
@@ -10,6 +10,7 @@ import static org.apache.hadoop.hive.ql.
 import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -447,9 +448,20 @@ public class CubeQueryContext {
     if (cube != null) {
       // go over the columns accessed in the query and find out which tables
       // can answer the query
+      String str = conf.get(CubeQueryConstants.VALID_FACT_TABLES);
+      List<String> validFactTables = StringUtils.isBlank(str) ? null :
+        Arrays.asList(StringUtils.split(str.toLowerCase()));
       for (Iterator<CubeFactTable> i = candidateFactTables.iterator();
           i.hasNext();) {
         CubeFactTable fact = i.next();
+        if (validFactTables != null) {
+          if (!validFactTables.contains(fact.getName().toLowerCase())) {
+            LOG.info("Not considering the fact table:" + fact + " as it is" +
+            		" not a valid fact");
+            i.remove();
+            continue;
+          }
+        }
         List<String> factCols = cubeTabToCols.get(fact);
         List<String> validFactCols = fact.getValidColumns();
         for (String col : cubeColumnsQueried) {
@@ -509,7 +521,7 @@ public class CubeQueryContext {
     return dimensions;
   }
 
-  private String getAliasForTabName(String tabName) {
+  public String getAliasForTabName(String tabName) {
     for (String alias : qb.getTabAliases()) {
       if (qb.getTabNameForAlias(alias).equalsIgnoreCase(tabName)) {
         return alias;
@@ -906,6 +918,11 @@ public class CubeQueryContext {
       Iterator<UpdatePeriod> it = partColMap.keySet().iterator();
       while (it.hasNext()) {
         UpdatePeriod updatePeriod = it.next();
+        if (storageTableMap.get(updatePeriod) == null ||
+            storageTableMap.get(updatePeriod).isEmpty()) {
+          throw new SemanticException("No storage table available for fact" +
+            fact + " for update period" + updatePeriod);
+        }
         String storageTable = storageTableMap.get(updatePeriod).get(0);
         storageTableToQuery.put(getCube(), storageTable);
         query.append(toHQL(storageTable));

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java?rev=1481826&r1=1481825&r2=1481826&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/StorageTableResolver.java Mon May 13 12:42:41 2013
@@ -1,10 +1,15 @@
 package org.apache.hadoop.hive.ql.cube.parse;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.cube.metadata.CubeDimensionTable;
 import org.apache.hadoop.hive.ql.cube.metadata.CubeFactTable;
@@ -14,8 +19,12 @@ import org.apache.hadoop.hive.ql.cube.me
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
 public class StorageTableResolver implements ContextRewriter {
+  public static Log LOG = LogFactory.getLog(StorageTableResolver.class.getName());
 
+  private final Configuration conf;
   public StorageTableResolver(Configuration conf) {
+    this.conf = conf;
+    String str = conf.get(CubeQueryConstants.VALID_STORAGE_FACT_TABLES);
   }
 
   @Override
@@ -29,6 +38,10 @@ public class StorageTableResolver implem
         new HashMap<CubeFactTable, Map<UpdatePeriod, List<String>>>();
     Map<CubeFactTable, Map<UpdatePeriod, List<String>>> factPartMap =
         cubeql.getFactPartitionMap();
+    String str = conf.get(CubeQueryConstants.VALID_STORAGE_FACT_TABLES);
+    List<String> validFactStorageTables = StringUtils.isBlank(str) ? null :
+      Arrays.asList(StringUtils.split(str.toLowerCase()));
+
     // Find candidate tables wrt supported storages
     for (CubeFactTable fact : factPartMap.keySet()) {
       Map<UpdatePeriod, List<String>> storageTableMap =
@@ -42,36 +55,71 @@ public class StorageTableResolver implem
         for (String storage : fact.getStorages()) {
           if (cubeql.isStorageSupported(storage)) {
             String tableName = MetastoreUtil.getFactStorageTableName(
-                fact.getName(), updatePeriod, Storage.getPrefix(storage));
+                fact.getName(), updatePeriod, Storage.getPrefix(storage))
+                .toLowerCase();
+            if (validFactStorageTables != null && !validFactStorageTables
+                .contains(tableName)) {
+              LOG.info("Not considering the fact storage table:" + tableName
+                  + " as it is not a valid fact storage");
+              continue;
+            }
             storageTables.add(tableName);
-            storageTableToWhereClause.put(tableName,
-                getWherePartClause(fact.getCubeName(), parts));
+            storageTableToWhereClause.put(tableName, getWherePartClause(
+                cubeql.getAliasForTabName(fact.getCubeName()), parts));
           } else {
-            System.out.println("Storage:" + storage + " is not supported");
+            LOG.info("Storage:" + storage + " is not supported");
           }
         }
       }
     }
     cubeql.setFactStorageMap(factStorageMap);
+    for (Iterator<CubeFactTable> i =
+        cubeql.getCandidateFactTables().iterator(); i.hasNext();) {
+      CubeFactTable fact = i.next();
+      Map<UpdatePeriod, List<String>> storageTableMap = factStorageMap.get(
+          fact);
+      Map<UpdatePeriod, List<String>> partColMap = cubeql.getFactPartitionMap()
+          .get(fact);
+      Iterator<UpdatePeriod> it = partColMap.keySet().iterator();
+      while (it.hasNext()) {
+        UpdatePeriod updatePeriod = it.next();
+        if (storageTableMap.get(updatePeriod) == null ||
+            storageTableMap.get(updatePeriod).isEmpty()) {
+          LOG.info("Removing fact:" + fact +
+              " from candidate fact tables, as it does not have storage tables"
+              + " for update period" + updatePeriod);
+          i.remove();
+          break;
+        }
+      }
+    }
 
     // resolve dimension tables
     Map<CubeDimensionTable, List<String>> dimStorageMap =
         new HashMap<CubeDimensionTable, List<String>>();
+    str = conf.get(CubeQueryConstants.VALID_STORAGE_DIM_TABLES);
+    List<String> validDimTables = StringUtils.isBlank(str) ? null :
+      Arrays.asList(StringUtils.split(str.toLowerCase()));
     for (CubeDimensionTable dim : cubeql.getDimensionTables()) {
       List<String> storageTables = new ArrayList<String>();
       dimStorageMap.put(dim, storageTables);
       for (String storage : dim.getStorages()) {
         if (cubeql.isStorageSupported(storage)) {
           String tableName = MetastoreUtil.getDimStorageTableName(
-              dim.getName(), Storage.getPrefix(storage));
+              dim.getName(), Storage.getPrefix(storage)).toLowerCase();
+          if (validDimTables != null && !validDimTables.contains(tableName)) {
+            LOG.info("Not considering the dim storage table:" + tableName
+                + " as it is not a valid dim storage");
+            continue;
+          }
           storageTables.add(tableName);
           if (dim.hasStorageSnapshots(storage)) {
             storageTableToWhereClause.put(tableName,
-                getWherePartClause(dim.getName(), Storage
-                    .getPartitionsForLatest()));
+                getWherePartClause(cubeql.getAliasForTabName(dim.getName()),
+                    Storage.getPartitionsForLatest()));
           }
         } else {
-          System.out.println("Storage:" + storage + " is not supported");
+          LOG.info("Storage:" + storage + " is not supported");
         }
       }
     }

Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java?rev=1481826&r1=1481825&r2=1481826&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java Mon May 13 12:42:41 2013
@@ -115,6 +115,10 @@ public class CubeTestSetup {
         TextInputFormat.class.getCanonicalName(),
         HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
     storageAggregatePeriods.put(hdfsStorage, updates);
+    Storage hdfsStorage2 = new HDFSStorage("C2",
+        TextInputFormat.class.getCanonicalName(),
+        HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
+    storageAggregatePeriods.put(hdfsStorage2, updates);
 
     // create cube fact
     client.createCubeFactTable(cubeName, factName, factColumns,

Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java?rev=1481826&r1=1481825&r2=1481826&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java Mon May 13 12:42:41 2013
@@ -6,6 +6,7 @@ import java.util.Date;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.cube.parse.CubeQueryConstants;
 import org.apache.hadoop.hive.ql.cube.parse.CubeTestSetup;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.junit.Assert;
@@ -102,6 +103,64 @@ public class TestCubeDriver {
         " where time_range_in('" + getDateUptoHours(twodaysBack)
         + "','" + getDateUptoHours(now) + "')");
     System.out.println("cube hql:" + hqlQuery);
+
+    conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact");
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+    hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+        " where time_range_in('" + getDateUptoHours(twodaysBack)
+        + "','" + getDateUptoHours(now) + "')");
+    System.out.println("cube hql:" + hqlQuery);
+
+    conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C2");
+    conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact");
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+    hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+        " where time_range_in('" + getDateUptoHours(twodaysBack)
+        + "','" + getDateUptoHours(now) + "')");
+    System.out.println("cube hql:" + hqlQuery);
+
+    conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C1");
+    conf.set(CubeQueryConstants.VALID_FACT_TABLES, "testFact2");
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+    hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+        " where time_range_in('" + getDateUptoHours(twodaysBack)
+        + "','" + getDateUptoHours(now) + "')");
+    System.out.println("cube hql:" + hqlQuery);
+
+    conf.set(CubeQueryConstants.VALID_FACT_TABLES, "");
+    conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "C1");
+    conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES,
+        "C1_testFact2_HOURLY");
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+    hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+        " where time_range_in('" + getDateUptoHours(twodaysBack)
+        + "','" + getDateUptoHours(now) + "')");
+    System.out.println("cube hql:" + hqlQuery);
+
+
+    // TODO fix following cases
+    try {
+      conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES,
+          "C1_testFact_HOURLY");
+      driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+      hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+          " where time_range_in('" + getDateUptoHours(twodaysBack)
+          + "','" + getDateUptoHours(now) + "')");
+      System.out.println("cube hql:" + hqlQuery);
+
+      conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(),
+          "C2");
+      conf.set(CubeQueryConstants.VALID_FACT_TABLES, "");
+      conf.set(CubeQueryConstants.VALID_STORAGE_FACT_TABLES,
+          "C2_testFact_HOURLY");
+      driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+      hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
+          " where time_range_in('" + getDateUptoHours(twodaysBack)
+          + "','" + getDateUptoHours(now) + "')");
+      System.out.println("cube hql:" + hqlQuery);
+    } catch (SemanticException e) {
+      e.printStackTrace();
+    }
     //Assert.assertEquals(queries[1], cubeql.toHQL());
   }
 
@@ -278,6 +337,13 @@ public class TestCubeDriver {
     System.out.println("cube hql:" + hqlQuery);
 
     conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "");
+    conf.set(CubeQueryConstants.VALID_STORAGE_DIM_TABLES, "C1_citytable");
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+    hqlQuery = driver.compileCubeQuery("select name, stateid from citytable");
+    System.out.println("cube hql:" + hqlQuery);
+
+    conf.set(HiveConf.ConfVars.HIVE_DRIVER_SUPPORTED_STORAGES.toString(), "");
+    conf.set(CubeQueryConstants.VALID_STORAGE_DIM_TABLES, "C2_citytable");
     driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     hqlQuery = driver.compileCubeQuery("select name, stateid from citytable");
     System.out.println("cube hql:" + hqlQuery);
@@ -363,8 +429,6 @@ public class TestCubeDriver {
       Assert.assertNotNull(exc);
       exc.printStackTrace();
     }
-
-
   }
 
   @Test
@@ -444,8 +508,8 @@ public class TestCubeDriver {
         " where time_range_in('" + getDateUptoHours(twodaysBack)
         + "','" + getDateUptoHours(now) + "')");
     System.out.println("cube hql:" + hqlQuery);
-    hqlQuery = driver.compileCubeQuery("select dim1, dim2, COUNT(msr1), SUM(msr2)," +
-        " msr3 from testCube" +
+    hqlQuery = driver.compileCubeQuery("select dim1, dim2, COUNT(msr1)," +
+    		" SUM(msr2), msr3 from testCube" +
         " where time_range_in('" + getDateUptoHours(twodaysBack)
         + "','" + getDateUptoHours(now) + "')");
     System.out.println("cube hql:" + hqlQuery);