You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/05/09 09:11:09 UTC

svn commit: r1480541 - in /hive/branches/HIVE-4115/ql/src: java/org/apache/hadoop/hive/ql/cube/metadata/ java/org/apache/hadoop/hive/ql/cube/parse/ test/org/apache/hadoop/hive/ql/cube/parse/ test/org/apache/hadoop/hive/ql/cube/processors/

Author: amareshwari
Date: Thu May  9 07:11:09 2013
New Revision: 1480541

URL: http://svn.apache.org/r1480541
Log:
Add validations for cube query

Modified:
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java
    hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
    hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
    hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java Thu May  9 07:11:09 2013
@@ -32,12 +32,12 @@ public final class Cube extends Abstract
 
     measureMap = new HashMap<String, CubeMeasure>();
     for (CubeMeasure m : measures) {
-      measureMap.put(m.getName(), m);
+      measureMap.put(m.getName().toLowerCase(), m);
     }
 
     dimMap = new HashMap<String, CubeDimension>();
     for (CubeDimension dim : dimensions) {
-      dimMap.put(dim.getName(), dim);
+      dimMap.put(dim.getName().toLowerCase(), dim);
     }
 
     addProperties();
@@ -49,12 +49,12 @@ public final class Cube extends Abstract
     this.dimensions = getDimensions(getName(), getProperties());
     measureMap = new HashMap<String, CubeMeasure>();
     for (CubeMeasure m : measures) {
-      measureMap.put(m.getName(), m);
+      measureMap.put(m.getName().toLowerCase(), m);
     }
 
     dimMap = new HashMap<String, CubeDimension>();
     for (CubeDimension dim : dimensions) {
-      dimMap.put(dim.getName(), dim);
+      dimMap.put(dim.getName().toLowerCase(), dim);
     }
   }
 

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java Thu May  9 07:11:09 2013
@@ -143,7 +143,7 @@ public class AggregateResolver implement
     }
   }
 
-  private String resolveForSelect(CubeQueryContext cubeql, String exprTree) {
+  private String resolveForSelect(CubeQueryContext cubeql, String exprTree) throws SemanticException {
     // Aggregate resolver needs cube to be resolved first
     assert cubeql.getCube() != null;
 
@@ -185,8 +185,11 @@ public class AggregateResolver implement
               // over expressions changed during aggregate resolver.
               cubeql.addAggregateExpr(exprTokens[i]);
             } else {
-              LOG.warn("Default aggregate not specified. measure:" + msrName + " token:" + token);
+              throw new SemanticException("Default aggregate is not set for measure: " + msrName);
             }
+          } else {
+            // should not be here, since if it is a measure, we should get a cube measure object
+            throw new SemanticException("Measure not found for " + msrName);
           }
         }
       }
@@ -197,7 +200,7 @@ public class AggregateResolver implement
 
   // We need to traverse the AST for Having clause.
   // We need to skip any columns that are inside an aggregate UDAF or inside an arithmetic expression
-  private String resolveForHaving(CubeQueryContext cubeql) {
+  private String resolveForHaving(CubeQueryContext cubeql) throws SemanticException {
     ASTNode havingTree = cubeql.getHavingAST();
     String havingTreeStr = cubeql.getHavingTree();
 
@@ -212,7 +215,7 @@ public class AggregateResolver implement
     return HQLParser.getString(havingTree);
   }
 
-  private void transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) {
+  private void transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) throws SemanticException {
     if (parent == null || node == null) {
       return;
     }
@@ -253,7 +256,7 @@ public class AggregateResolver implement
   }
 
   // Wrap an aggregate function around the node if its a measure, leave it unchanged otherwise
-  private ASTNode wrapAggregate(CubeQueryContext cubeql, ASTNode node) {
+  private ASTNode wrapAggregate(CubeQueryContext cubeql, ASTNode node) throws SemanticException {
 
     String tabname = null;
     String colname = null;
@@ -275,6 +278,10 @@ public class AggregateResolver implement
     if (cubeql.isCubeMeasure(msrname)) {
       CubeMeasure measure = cubeql.getCube().getMeasureByName(colname);
       String aggregateFn = measure.getAggregate();
+
+      if (StringUtils.isBlank(aggregateFn)) {
+        throw new SemanticException("Default aggregate is not set for measure: " + colname);
+      }
       ASTNode fnroot = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION));
       fnroot.setParent(node.getParent());
 

Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java Thu May  9 07:11:09 2013
@@ -241,6 +241,12 @@ public class CubeQueryContext {
     } catch (HiveException e) {
       throw new SemanticException(e);
     }
+
+    assert timeFrom != null && timeTo != null;
+
+    if (timeFrom.after(timeTo)) {
+      throw new SemanticException("From date: " + fromDateRaw + " is after to date:" + toDateRaw);
+    }
   }
 
   private void extractColumns() throws SemanticException {
@@ -369,20 +375,33 @@ public class CubeQueryContext {
       return;
     }
     for (String col : columns) {
+      boolean inCube = false;
       if (cube != null) {
         List<String> cols = cubeTabToCols.get(cube);
         if (cols.contains(col.toLowerCase())) {
-          columnToTabAlias.put(col, getAliasForTabName(cube.getName()));
+          columnToTabAlias.put(col.toLowerCase(), getAliasForTabName(cube.getName()));
           cubeColumnsQueried.add(col);
+          inCube = true;
         }
       }
       for (CubeDimensionTable dim: dimensions) {
         if (cubeTabToCols.get(dim).contains(col.toLowerCase())) {
-          columnToTabAlias.put(col, dim.getName());
-          break;
+          if (!inCube) {
+            String prevDim = columnToTabAlias.get(col.toLowerCase());
+            if (prevDim != null && !prevDim.equals(dim.getName())) {
+              throw new SemanticException("Ambiguous column:" + col
+                  + " in dimensions '" + prevDim + "' and '" + dim.getName()+"'");
+            }
+            columnToTabAlias.put(col.toLowerCase(), dim.getName());
+            break;
+          } else {
+            // throw error because column is in both cube and dimension table
+            throw new SemanticException("Ambiguous column:" + col
+                + " in cube: " + cube.getName() + " and dimension: " + dim.getName());
+          }
         }
       }
-      if (columnToTabAlias.get(col) == null) {
+      if (columnToTabAlias.get(col.toLowerCase()) == null) {
         throw new SemanticException("Could not find the table containing" +
             " column:" + col);
       }
@@ -397,7 +416,7 @@ public class CubeQueryContext {
         CubeFactTable fact = i.next();
         List<String> factCols = cubeTabToCols.get(fact);
         for (String col : cubeColumnsQueried) {
-          if (!factCols.contains(col)) {
+          if (!factCols.contains(col.toLowerCase())) {
             System.out.println("Not considering the fact table:" + fact +
                 " as column " + col + " is not available");
             i.remove();

Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java Thu May  9 07:11:09 2013
@@ -54,6 +54,10 @@ public class CubeTestSetup {
     cubeMeasures.add(new ExprMeasure(new FieldSchema("msr6", "bigint",
         "sixth measure"),
         "(msr1 + msr2)/ msr4", "", "SUM", "RS"));
+    cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint",
+        "measure without a default aggregate"),
+        null, null, null
+        ));
 
     cubeDimensions = new HashSet<CubeDimension>();
     List<CubeDimension> locationHierarchy = new ArrayList<CubeDimension>();
@@ -72,6 +76,8 @@ public class CubeTestSetup {
     cubeDimensions.add(new HierarchicalDimension("location", locationHierarchy));
     cubeDimensions.add(new BaseDimension(new FieldSchema("dim1", "string",
         "basedim")));
+    // Added for ambiguity test
+    cubeDimensions.add(new BaseDimension(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity")));
     cubeDimensions.add(new ReferencedDimension(
             new FieldSchema("dim2", "string", "ref dim"),
             new TableReference("testdim2", "id")));
@@ -92,6 +98,7 @@ public class CubeTestSetup {
     // add dimensions of the cube
     factColumns.add(new FieldSchema("zipcode","int", "zip"));
     factColumns.add(new FieldSchema("cityid","int", "city id"));
+    factColumns.add(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"));
 
     Map<Storage, List<UpdatePeriod>> storageAggregatePeriods =
         new HashMap<Storage, List<UpdatePeriod>>();
@@ -173,7 +180,8 @@ public class CubeTestSetup {
     dimColumns.add(new FieldSchema("name", "string", "field1"));
     dimColumns.add(new FieldSchema("stateid", "int", "state id"));
     dimColumns.add(new FieldSchema("zipcode", "int", "zip code"));
-
+    dimColumns.add(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"));
+    dimColumns.add(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"));
     Map<String, TableReference> dimensionReferences =
         new HashMap<String, TableReference>();
     dimensionReferences.put("stateid", new TableReference("statetable", "id"));
@@ -223,7 +231,7 @@ public class CubeTestSetup {
     dimColumns.add(new FieldSchema("name", "string", "field1"));
     dimColumns.add(new FieldSchema("capital", "string", "field2"));
     dimColumns.add(new FieldSchema("region", "string", "region name"));
-
+    dimColumns.add(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"));
     Storage hdfsStorage = new HDFSStorage("C1",
         TextInputFormat.class.getCanonicalName(),
         HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());

Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java Thu May  9 07:11:09 2013
@@ -9,6 +9,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.ql.cube.parse.CubeTestSetup;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -38,6 +39,12 @@ public class TestCubeDriver {
 
   }
 
+  @Before
+  public void setupDriver() throws Exception {
+    conf = new Configuration();
+    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+  }
+
   public static String HOUR_FMT = "yyyy-MM-dd HH";
   public static final SimpleDateFormat HOUR_PARSER = new SimpleDateFormat(HOUR_FMT);
 
@@ -54,8 +61,6 @@ public class TestCubeDriver {
 
   @Test
   public void testQueryWithNow() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     Throwable th = null;
     try {
       String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
@@ -69,8 +74,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCandidateTables() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     Throwable th = null;
     try {
       String hqlQuery = driver.compileCubeQuery("select dim1, SUM(msr2)" +
@@ -86,8 +89,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCubeWhereQuery() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     System.out.println("Test from:" + getDateUptoHours(twodaysBack) + " to:" + getDateUptoHours(now));
     //String expected = " sum( testcube.msr2 ) FROM  C1_testfact_HOURLY testcube  WHERE " + whereClause(HOURLY) + " UNION " +
     // SELECT sum( testcube.msr2 ) FROM  C1_testfact_DAILY testcube  WHERE + whereClause(DAILY)
@@ -101,8 +102,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCubeJoinQuery() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     //String expected = "select SUM(testCube.msr2) from "
     String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube"
         + " join citytable on testCube.cityid = citytable.id"
@@ -128,8 +127,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCubeGroupbyQuery() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     Calendar cal = Calendar.getInstance();
     Date now = cal.getTime();
     System.out.println("Test now:" + now);
@@ -189,8 +186,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCubeQueryWithAilas() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
         " where time_range_in('" + getDateUptoHours(twodaysBack)
         + "','" + getDateUptoHours(now) + "')");
@@ -233,8 +228,6 @@ public class TestCubeDriver {
 
   @Test
   public void testCubeWhereQueryForMonth() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(new Configuration(), HiveConf.class));
     System.out.println("Test from:" + getDateUptoHours(twoMonthsBack) + " to:" + getDateUptoHours(now));
     String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
         " where time_range_in('" + getDateUptoHours(twoMonthsBack)
@@ -252,8 +245,6 @@ public class TestCubeDriver {
 
   @Test
   public void testDimensionQueryWithMultipleStorages() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     String hqlQuery = driver.compileCubeQuery("select name, stateid from citytable");
     System.out.println("cube hql:" + hqlQuery);
 
@@ -279,8 +270,6 @@ public class TestCubeDriver {
 
   @Test
   public void testLimitQueryOnDimension() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
     String hqlQuery = driver.compileCubeQuery("select name, stateid from citytable limit 100");
     System.out.println("cube hql:" + hqlQuery);
     //Assert.assertEquals(queries[1], cubeql.toHQL());
@@ -296,9 +285,6 @@ public class TestCubeDriver {
 
   @Test
   public void testAggregateResolver() throws Exception {
-    conf = new Configuration();
-    driver = new CubeDriver(new HiveConf(new Configuration(), HiveConf.class));
-
     String timeRange = " where  time_range_in('2013-05-01', '2013-05-03')";
     System.out.println("#$AGGREGATE_RESOLVER_ TIME_RANGE:" + timeRange);
     String q1 = "SELECT cityid, testCube.msr2 from testCube " + timeRange;
@@ -323,6 +309,88 @@ public class TestCubeDriver {
     } catch (SemanticException e) {
       e.printStackTrace();
     }
+
+    String q8 = "SELECT cityid, testCube.noAggrMsr FROM testCube " + timeRange;
+    try {
+      // Should throw exception in aggregate resolver because noAggrMsr does not have a default aggregate defined.s
+      String hql = driver.compileCubeQuery(q8);
+      Assert.assertTrue("Should not reach here", false);
+    } catch (SemanticException exc) {
+      Assert.assertNotNull(exc);
+      exc.printStackTrace();
+    }
+  }
+
+  @Test
+  public void testColumnAmbiguity() throws Exception {
+    String timeRange = " where  time_range_in('2013-05-01', '2013-05-03')";
+    String query = "SELECT ambigdim1, testCube.msr1 FROM testCube " + timeRange;
+
+    try {
+      String hql = driver.compileCubeQuery(query);
+      Assert.assertTrue("Should not reach here", false);
+    } catch (SemanticException exc) {
+      Assert.assertNotNull(exc);
+      exc.printStackTrace();
+    }
+
+    String q2 = "SELECT ambigdim2, testCube.msr1 FROM testCube " + timeRange;
+    try {
+      String hql = driver.compileCubeQuery(q2);
+      Assert.assertTrue("Should not reach here", false);
+    } catch (SemanticException exc) {
+      Assert.assertNotNull(exc);
+      exc.printStackTrace();
+    }
+
+
+  }
+
+  @Test
+  public void testMissingColumnValidation() throws Exception {
+    String timeRange = " where  time_range_in('2013-05-01', '2013-05-03')";
+
+    try {
+      // this query should go through
+      String q1Hql =
+          driver.compileCubeQuery("SELECT cityid, msr2 from testCube " + timeRange);
+    } catch (SemanticException exc) {
+      exc.printStackTrace();
+      Assert.assertTrue("Exception not expected here", false);
+    }
+
+    try {
+      // this query should through exception because invalidMsr is invalid
+      String q2Hql =
+          driver.compileCubeQuery("SELECT cityid, invalidMsr from testCube " + timeRange);
+      Assert.assertTrue("Should not reach here", false);
+    } catch (SemanticException exc) {
+      exc.printStackTrace();
+      Assert.assertNotNull(exc);
+    }
+  }
+
+  @Test
+  public void testTimeRangeValidation() throws Exception {
+    String timeRange1 = " where  time_range_in('2013-05-01', '2013-05-03')";
+
+    try {
+      String hql = driver.compileCubeQuery("SELECT cityid, testCube.msr2 from testCube " + timeRange1);
+    } catch (SemanticException exc) {
+      exc.printStackTrace(System.out);
+      Assert.assertTrue("Exception not expected here", false);
+    }
+
+    // swap to and from -
+    String timeRange2 = " where  time_range_in('2013-05-03', '2013-05-01')";
+    try {
+      // this should throw exception because from date is after to date
+      String hql = driver.compileCubeQuery("SELECT cityid, testCube.msr2 from testCube " + timeRange2);
+      Assert.assertTrue("Should not reach here", false);
+    } catch (SemanticException exc) {
+      exc.printStackTrace(System.out);
+      Assert.assertNotNull(exc);
+    }
   }
 
 }