You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2013/05/09 09:11:09 UTC
svn commit: r1480541 - in /hive/branches/HIVE-4115/ql/src:
java/org/apache/hadoop/hive/ql/cube/metadata/
java/org/apache/hadoop/hive/ql/cube/parse/
test/org/apache/hadoop/hive/ql/cube/parse/
test/org/apache/hadoop/hive/ql/cube/processors/
Author: amareshwari
Date: Thu May 9 07:11:09 2013
New Revision: 1480541
URL: http://svn.apache.org/r1480541
Log:
Add validations for cube query
Modified:
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java
hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/metadata/Cube.java Thu May 9 07:11:09 2013
@@ -32,12 +32,12 @@ public final class Cube extends Abstract
measureMap = new HashMap<String, CubeMeasure>();
for (CubeMeasure m : measures) {
- measureMap.put(m.getName(), m);
+ measureMap.put(m.getName().toLowerCase(), m);
}
dimMap = new HashMap<String, CubeDimension>();
for (CubeDimension dim : dimensions) {
- dimMap.put(dim.getName(), dim);
+ dimMap.put(dim.getName().toLowerCase(), dim);
}
addProperties();
@@ -49,12 +49,12 @@ public final class Cube extends Abstract
this.dimensions = getDimensions(getName(), getProperties());
measureMap = new HashMap<String, CubeMeasure>();
for (CubeMeasure m : measures) {
- measureMap.put(m.getName(), m);
+ measureMap.put(m.getName().toLowerCase(), m);
}
dimMap = new HashMap<String, CubeDimension>();
for (CubeDimension dim : dimensions) {
- dimMap.put(dim.getName(), dim);
+ dimMap.put(dim.getName().toLowerCase(), dim);
}
}
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/AggregateResolver.java Thu May 9 07:11:09 2013
@@ -143,7 +143,7 @@ public class AggregateResolver implement
}
}
- private String resolveForSelect(CubeQueryContext cubeql, String exprTree) {
+ private String resolveForSelect(CubeQueryContext cubeql, String exprTree) throws SemanticException {
// Aggregate resolver needs cube to be resolved first
assert cubeql.getCube() != null;
@@ -185,8 +185,11 @@ public class AggregateResolver implement
// over expressions changed during aggregate resolver.
cubeql.addAggregateExpr(exprTokens[i]);
} else {
- LOG.warn("Default aggregate not specified. measure:" + msrName + " token:" + token);
+ throw new SemanticException("Default aggregate is not set for measure: " + msrName);
}
+ } else {
+ // should not be here, since if it is a measure, we should get a cube measure object
+ throw new SemanticException("Measure not found for " + msrName);
}
}
}
@@ -197,7 +200,7 @@ public class AggregateResolver implement
// We need to traverse the AST for Having clause.
// We need to skip any columns that are inside an aggregate UDAF or inside an arithmetic expression
- private String resolveForHaving(CubeQueryContext cubeql) {
+ private String resolveForHaving(CubeQueryContext cubeql) throws SemanticException {
ASTNode havingTree = cubeql.getHavingAST();
String havingTreeStr = cubeql.getHavingTree();
@@ -212,7 +215,7 @@ public class AggregateResolver implement
return HQLParser.getString(havingTree);
}
- private void transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) {
+ private void transform(CubeQueryContext cubeql, ASTNode parent, ASTNode node, int nodePos) throws SemanticException {
if (parent == null || node == null) {
return;
}
@@ -253,7 +256,7 @@ public class AggregateResolver implement
}
// Wrap an aggregate function around the node if its a measure, leave it unchanged otherwise
- private ASTNode wrapAggregate(CubeQueryContext cubeql, ASTNode node) {
+ private ASTNode wrapAggregate(CubeQueryContext cubeql, ASTNode node) throws SemanticException {
String tabname = null;
String colname = null;
@@ -275,6 +278,10 @@ public class AggregateResolver implement
if (cubeql.isCubeMeasure(msrname)) {
CubeMeasure measure = cubeql.getCube().getMeasureByName(colname);
String aggregateFn = measure.getAggregate();
+
+ if (StringUtils.isBlank(aggregateFn)) {
+ throw new SemanticException("Default aggregate is not set for measure: " + colname);
+ }
ASTNode fnroot = new ASTNode(new CommonToken(HiveParser.TOK_FUNCTION));
fnroot.setParent(node.getParent());
Modified: hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java (original)
+++ hive/branches/HIVE-4115/ql/src/java/org/apache/hadoop/hive/ql/cube/parse/CubeQueryContext.java Thu May 9 07:11:09 2013
@@ -241,6 +241,12 @@ public class CubeQueryContext {
} catch (HiveException e) {
throw new SemanticException(e);
}
+
+ assert timeFrom != null && timeTo != null;
+
+ if (timeFrom.after(timeTo)) {
+ throw new SemanticException("From date: " + fromDateRaw + " is after to date:" + toDateRaw);
+ }
}
private void extractColumns() throws SemanticException {
@@ -369,20 +375,33 @@ public class CubeQueryContext {
return;
}
for (String col : columns) {
+ boolean inCube = false;
if (cube != null) {
List<String> cols = cubeTabToCols.get(cube);
if (cols.contains(col.toLowerCase())) {
- columnToTabAlias.put(col, getAliasForTabName(cube.getName()));
+ columnToTabAlias.put(col.toLowerCase(), getAliasForTabName(cube.getName()));
cubeColumnsQueried.add(col);
+ inCube = true;
}
}
for (CubeDimensionTable dim: dimensions) {
if (cubeTabToCols.get(dim).contains(col.toLowerCase())) {
- columnToTabAlias.put(col, dim.getName());
- break;
+ if (!inCube) {
+ String prevDim = columnToTabAlias.get(col.toLowerCase());
+ if (prevDim != null && !prevDim.equals(dim.getName())) {
+ throw new SemanticException("Ambiguous column:" + col
+ + " in dimensions '" + prevDim + "' and '" + dim.getName()+"'");
+ }
+ columnToTabAlias.put(col.toLowerCase(), dim.getName());
+ break;
+ } else {
+ // throw error because column is in both cube and dimension table
+ throw new SemanticException("Ambiguous column:" + col
+ + " in cube: " + cube.getName() + " and dimension: " + dim.getName());
+ }
}
}
- if (columnToTabAlias.get(col) == null) {
+ if (columnToTabAlias.get(col.toLowerCase()) == null) {
throw new SemanticException("Could not find the table containing" +
" column:" + col);
}
@@ -397,7 +416,7 @@ public class CubeQueryContext {
CubeFactTable fact = i.next();
List<String> factCols = cubeTabToCols.get(fact);
for (String col : cubeColumnsQueried) {
- if (!factCols.contains(col)) {
+ if (!factCols.contains(col.toLowerCase())) {
System.out.println("Not considering the fact table:" + fact +
" as column " + col + " is not available");
i.remove();
Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/parse/CubeTestSetup.java Thu May 9 07:11:09 2013
@@ -54,6 +54,10 @@ public class CubeTestSetup {
cubeMeasures.add(new ExprMeasure(new FieldSchema("msr6", "bigint",
"sixth measure"),
"(msr1 + msr2)/ msr4", "", "SUM", "RS"));
+ cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint",
+ "measure without a default aggregate"),
+ null, null, null
+ ));
cubeDimensions = new HashSet<CubeDimension>();
List<CubeDimension> locationHierarchy = new ArrayList<CubeDimension>();
@@ -72,6 +76,8 @@ public class CubeTestSetup {
cubeDimensions.add(new HierarchicalDimension("location", locationHierarchy));
cubeDimensions.add(new BaseDimension(new FieldSchema("dim1", "string",
"basedim")));
+ // Added for ambiguity test
+ cubeDimensions.add(new BaseDimension(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity")));
cubeDimensions.add(new ReferencedDimension(
new FieldSchema("dim2", "string", "ref dim"),
new TableReference("testdim2", "id")));
@@ -92,6 +98,7 @@ public class CubeTestSetup {
// add dimensions of the cube
factColumns.add(new FieldSchema("zipcode","int", "zip"));
factColumns.add(new FieldSchema("cityid","int", "city id"));
+ factColumns.add(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"));
Map<Storage, List<UpdatePeriod>> storageAggregatePeriods =
new HashMap<Storage, List<UpdatePeriod>>();
@@ -173,7 +180,8 @@ public class CubeTestSetup {
dimColumns.add(new FieldSchema("name", "string", "field1"));
dimColumns.add(new FieldSchema("stateid", "int", "state id"));
dimColumns.add(new FieldSchema("zipcode", "int", "zip code"));
-
+ dimColumns.add(new FieldSchema("ambigdim1", "string", "used in testColumnAmbiguity"));
+ dimColumns.add(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"));
Map<String, TableReference> dimensionReferences =
new HashMap<String, TableReference>();
dimensionReferences.put("stateid", new TableReference("statetable", "id"));
@@ -223,7 +231,7 @@ public class CubeTestSetup {
dimColumns.add(new FieldSchema("name", "string", "field1"));
dimColumns.add(new FieldSchema("capital", "string", "field2"));
dimColumns.add(new FieldSchema("region", "string", "region name"));
-
+ dimColumns.add(new FieldSchema("ambigdim2", "string", "used in testColumnAmbiguity"));
Storage hdfsStorage = new HDFSStorage("C1",
TextInputFormat.class.getCanonicalName(),
HiveIgnoreKeyTextOutputFormat.class.getCanonicalName());
Modified: hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java?rev=1480541&r1=1480540&r2=1480541&view=diff
==============================================================================
--- hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java (original)
+++ hive/branches/HIVE-4115/ql/src/test/org/apache/hadoop/hive/ql/cube/processors/TestCubeDriver.java Thu May 9 07:11:09 2013
@@ -9,6 +9,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.cube.parse.CubeTestSetup;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.junit.Assert;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -38,6 +39,12 @@ public class TestCubeDriver {
}
+ @Before
+ public void setupDriver() throws Exception {
+ conf = new Configuration();
+ driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
+ }
+
public static String HOUR_FMT = "yyyy-MM-dd HH";
public static final SimpleDateFormat HOUR_PARSER = new SimpleDateFormat(HOUR_FMT);
@@ -54,8 +61,6 @@ public class TestCubeDriver {
@Test
public void testQueryWithNow() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
Throwable th = null;
try {
String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
@@ -69,8 +74,6 @@ public class TestCubeDriver {
@Test
public void testCandidateTables() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
Throwable th = null;
try {
String hqlQuery = driver.compileCubeQuery("select dim1, SUM(msr2)" +
@@ -86,8 +89,6 @@ public class TestCubeDriver {
@Test
public void testCubeWhereQuery() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
System.out.println("Test from:" + getDateUptoHours(twodaysBack) + " to:" + getDateUptoHours(now));
//String expected = " sum( testcube.msr2 ) FROM C1_testfact_HOURLY testcube WHERE " + whereClause(HOURLY) + " UNION " +
// SELECT sum( testcube.msr2 ) FROM C1_testfact_DAILY testcube WHERE + whereClause(DAILY)
@@ -101,8 +102,6 @@ public class TestCubeDriver {
@Test
public void testCubeJoinQuery() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
//String expected = "select SUM(testCube.msr2) from "
String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube"
+ " join citytable on testCube.cityid = citytable.id"
@@ -128,8 +127,6 @@ public class TestCubeDriver {
@Test
public void testCubeGroupbyQuery() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
Calendar cal = Calendar.getInstance();
Date now = cal.getTime();
System.out.println("Test now:" + now);
@@ -189,8 +186,6 @@ public class TestCubeDriver {
@Test
public void testCubeQueryWithAilas() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
" where time_range_in('" + getDateUptoHours(twodaysBack)
+ "','" + getDateUptoHours(now) + "')");
@@ -233,8 +228,6 @@ public class TestCubeDriver {
@Test
public void testCubeWhereQueryForMonth() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(new Configuration(), HiveConf.class));
System.out.println("Test from:" + getDateUptoHours(twoMonthsBack) + " to:" + getDateUptoHours(now));
String hqlQuery = driver.compileCubeQuery("select SUM(msr2) from testCube" +
" where time_range_in('" + getDateUptoHours(twoMonthsBack)
@@ -252,8 +245,6 @@ public class TestCubeDriver {
@Test
public void testDimensionQueryWithMultipleStorages() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
String hqlQuery = driver.compileCubeQuery("select name, stateid from citytable");
System.out.println("cube hql:" + hqlQuery);
@@ -279,8 +270,6 @@ public class TestCubeDriver {
@Test
public void testLimitQueryOnDimension() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(conf, HiveConf.class));
String hqlQuery = driver.compileCubeQuery("select name, stateid from citytable limit 100");
System.out.println("cube hql:" + hqlQuery);
//Assert.assertEquals(queries[1], cubeql.toHQL());
@@ -296,9 +285,6 @@ public class TestCubeDriver {
@Test
public void testAggregateResolver() throws Exception {
- conf = new Configuration();
- driver = new CubeDriver(new HiveConf(new Configuration(), HiveConf.class));
-
String timeRange = " where time_range_in('2013-05-01', '2013-05-03')";
System.out.println("#$AGGREGATE_RESOLVER_ TIME_RANGE:" + timeRange);
String q1 = "SELECT cityid, testCube.msr2 from testCube " + timeRange;
@@ -323,6 +309,88 @@ public class TestCubeDriver {
} catch (SemanticException e) {
e.printStackTrace();
}
+
+ String q8 = "SELECT cityid, testCube.noAggrMsr FROM testCube " + timeRange;
+ try {
+ // Should throw exception in aggregate resolver because noAggrMsr does not have a default aggregate defined.s
+ String hql = driver.compileCubeQuery(q8);
+ Assert.assertTrue("Should not reach here", false);
+ } catch (SemanticException exc) {
+ Assert.assertNotNull(exc);
+ exc.printStackTrace();
+ }
+ }
+
+ @Test
+ public void testColumnAmbiguity() throws Exception {
+ String timeRange = " where time_range_in('2013-05-01', '2013-05-03')";
+ String query = "SELECT ambigdim1, testCube.msr1 FROM testCube " + timeRange;
+
+ try {
+ String hql = driver.compileCubeQuery(query);
+ Assert.assertTrue("Should not reach here", false);
+ } catch (SemanticException exc) {
+ Assert.assertNotNull(exc);
+ exc.printStackTrace();
+ }
+
+ String q2 = "SELECT ambigdim2, testCube.msr1 FROM testCube " + timeRange;
+ try {
+ String hql = driver.compileCubeQuery(q2);
+ Assert.assertTrue("Should not reach here", false);
+ } catch (SemanticException exc) {
+ Assert.assertNotNull(exc);
+ exc.printStackTrace();
+ }
+
+
+ }
+
+ @Test
+ public void testMissingColumnValidation() throws Exception {
+ String timeRange = " where time_range_in('2013-05-01', '2013-05-03')";
+
+ try {
+ // this query should go through
+ String q1Hql =
+ driver.compileCubeQuery("SELECT cityid, msr2 from testCube " + timeRange);
+ } catch (SemanticException exc) {
+ exc.printStackTrace();
+ Assert.assertTrue("Exception not expected here", false);
+ }
+
+ try {
+ // this query should through exception because invalidMsr is invalid
+ String q2Hql =
+ driver.compileCubeQuery("SELECT cityid, invalidMsr from testCube " + timeRange);
+ Assert.assertTrue("Should not reach here", false);
+ } catch (SemanticException exc) {
+ exc.printStackTrace();
+ Assert.assertNotNull(exc);
+ }
+ }
+
+ @Test
+ public void testTimeRangeValidation() throws Exception {
+ String timeRange1 = " where time_range_in('2013-05-01', '2013-05-03')";
+
+ try {
+ String hql = driver.compileCubeQuery("SELECT cityid, testCube.msr2 from testCube " + timeRange1);
+ } catch (SemanticException exc) {
+ exc.printStackTrace(System.out);
+ Assert.assertTrue("Exception not expected here", false);
+ }
+
+ // swap to and from -
+ String timeRange2 = " where time_range_in('2013-05-03', '2013-05-01')";
+ try {
+ // this should throw exception because from date is after to date
+ String hql = driver.compileCubeQuery("SELECT cityid, testCube.msr2 from testCube " + timeRange2);
+ Assert.assertTrue("Should not reach here", false);
+ } catch (SemanticException exc) {
+ exc.printStackTrace(System.out);
+ Assert.assertNotNull(exc);
+ }
}
}