You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2014/12/29 07:54:47 UTC

incubator-lens git commit: LENS-151 : Fix groupby promotion for queries with dim attributes having alias names (Himanshu Gahlaut via amareshwari)

Repository: incubator-lens
Updated Branches:
  refs/heads/master 7338f202c -> c7bb16615


LENS-151 : Fix groupby promotion for queries with dim attributes having alias names (Himanshu Gahlaut via amareshwari)


Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/c7bb1661
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/c7bb1661
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/c7bb1661

Branch: refs/heads/master
Commit: c7bb16615692e3bf02729e1d72c824e1c005ea22
Parents: 7338f20
Author: Amareshwari Sriramdasu <am...@inmobi.com>
Authored: Mon Dec 29 12:24:37 2014 +0530
Committer: Amareshwari Sriramdasu <am...@inmobi.com>
Committed: Mon Dec 29 12:24:37 2014 +0530

----------------------------------------------------------------------
 lens-cube/pom.xml                               | 22 +++++-
 .../apache/lens/cube/parse/GroupbyResolver.java | 71 +++++++++++++++++---
 .../org/apache/lens/cube/parse/HQLParser.java   | 33 +++++++++
 .../apache/lens/cube/parse/CubeTestSetup.java   | 36 ++++++++--
 .../lens/cube/parse/TestCubeRewriter.java       | 34 ++++++++++
 .../lens/cube/parse/TestQueryRewrite.java       | 10 +--
 lens-cube/src/test/resources/log4j.properties   | 24 ++++---
 7 files changed, 201 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/pom.xml
----------------------------------------------------------------------
diff --git a/lens-cube/pom.xml b/lens-cube/pom.xml
index 42c1a5a..b1458f9 100644
--- a/lens-cube/pom.xml
+++ b/lens-cube/pom.xml
@@ -50,7 +50,7 @@
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
+        <artifactId>mockito-all</artifactId>
     </dependency>
      <dependency>
         <groupId>org.powermock</groupId>
@@ -71,6 +71,26 @@
         <version>${project.version}</version>
         <scope>test</scope>
       </dependency>
+      <dependency>
+        <groupId>org.projectlombok</groupId>
+        <artifactId>lombok</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-log4j12</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>log4j</groupId>
+        <artifactId>log4j</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+      </dependency>
     </dependencies>
 
     <build>

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/main/java/org/apache/lens/cube/parse/GroupbyResolver.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/GroupbyResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/GroupbyResolver.java
index c2fef7e..ee61aaa 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/GroupbyResolver.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/GroupbyResolver.java
@@ -18,12 +18,8 @@
  */
 package org.apache.lens.cube.parse;
 
-import static org.apache.hadoop.hive.ql.parse.HiveParser.DOT;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_TABLE_OR_COL;
-import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_GROUPBY;
-
 import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 
 import org.antlr.runtime.CommonToken;
@@ -37,6 +33,8 @@ import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.ParseException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
+import static org.apache.hadoop.hive.ql.parse.HiveParser.*;
+
 /**
  * Promotes groupby to select and select to groupby.
  * 
@@ -55,7 +53,7 @@ class GroupbyResolver implements ContextRewriter {
             CubeQueryConfUtil.DEFAULT_ENABLE_GROUP_BY_TO_SELECT);
   }
 
-  private void promoteSelect(CubeQueryContext cubeql, List<String> selectExprs, List<String> groupByExprs)
+  private void promoteSelect(CubeQueryContext cubeql, List<String> nonMsrNonAggSelExprsWithoutAlias, List<String> groupByExprs)
       throws SemanticException {
     if (!selectPromotionEnabled) {
       return;
@@ -69,14 +67,13 @@ class GroupbyResolver implements ContextRewriter {
     // each selected column, if it is not a cube measure, and does not have
     // aggregation on the column, then it is added to group by columns.
     if (cubeql.hasAggregates()) {
-      for (String expr : selectExprs) {
-        expr = getExpressionWithoutAlias(cubeql, expr);
+      for (String expr : nonMsrNonAggSelExprsWithoutAlias) {
+
         if (!groupByExprs.contains(expr)) {
           if (!cubeql.isAggregateExpr(expr)) {
-            String groupbyExpr = expr;
             ASTNode exprAST;
             try {
-              exprAST = HQLParser.parseExpr(groupbyExpr);
+              exprAST = HQLParser.parseExpr(expr);
             } catch (ParseException e) {
               throw new SemanticException(e);
             }
@@ -157,7 +154,7 @@ class GroupbyResolver implements ContextRewriter {
         groupByExprs.add(g.trim());
       }
     }
-    promoteSelect(cubeql, selectExprs, groupByExprs);
+    promoteSelect(cubeql, getNonMsrNonAggSelExprsWithoutAlias(cubeql.getSelectAST(), cubeql), groupByExprs);
     promoteGroupby(cubeql, selectExprs, groupByExprs);
   }
 
@@ -179,6 +176,58 @@ class GroupbyResolver implements ContextRewriter {
     return false;
   }
 
+  /**
+   *
+   * @param selectASTNode a select AST Node
+   * @param cubeQueryCtx
+   * @return List of non measure and non aggregate select expressions in string format without aliases
+   */
+  private List<String> getNonMsrNonAggSelExprsWithoutAlias(final ASTNode selectASTNode, CubeQueryContext cubeQueryCtx) {
+
+    List<String> nonMsrNonAggSelExprsWithoutAlias = new LinkedList<String>();
+    List<ASTNode> nonMsrNonAggSelASTChildren = filterNonMsrNonAggSelectASTChildren(selectASTNode,cubeQueryCtx);
+
+    for (ASTNode nonMsrNonAggSelASTChild : nonMsrNonAggSelASTChildren) {
+
+      /* Assuming all children of SelectASTNode are SELECT Expression AST Nodes only.
+      Refer:https://reviews.apache.org/r/29422/#comment109498 for more details.
+      Order of Children of select expression AST Node => Index 0: Select Expression Without Alias, Index 1: Alias */
+
+      ASTNode selExprWithoutAlias = (ASTNode) nonMsrNonAggSelASTChild.getChildren().get(0);
+      String result = HQLParser.getString(selExprWithoutAlias);
+      nonMsrNonAggSelExprsWithoutAlias.add(result);
+
+    }
+    return nonMsrNonAggSelExprsWithoutAlias;
+  }
+
+  /**
+   *
+   * @param selectASTNode a select ASTNode
+   * @param cubeQueryCtx
+   * @return list of selectASTNode Children which does not contain a measure or an aggregate.
+   *         Empty list is returned when selectASTNode is not a Select AST Node.
+   *         Empty list is returned when there are no non measure and non aggregate children nodes present in select AST.
+   */
+  private List<ASTNode> filterNonMsrNonAggSelectASTChildren(final ASTNode selectASTNode, CubeQueryContext cubeQueryCtx) {
+
+    List<ASTNode> nonMsrNonAggSelASTChildren = new LinkedList<ASTNode>();
+
+    if (!HQLParser.isSelectASTNode(selectASTNode)) {
+      return nonMsrNonAggSelASTChildren;
+    }
+
+    for (int i = 0; i < selectASTNode.getChildCount(); i++) {
+      ASTNode childNode = (ASTNode) selectASTNode.getChild(i);
+      if (hasMeasure(childNode,cubeQueryCtx) || hasAggregate(childNode)) {
+        continue;
+      }
+      nonMsrNonAggSelASTChildren.add(childNode);
+    }
+
+    return nonMsrNonAggSelASTChildren;
+  }
+
   private List<String> getExpressions(ASTNode node, CubeQueryContext cubeql) {
 
     List<String> list = new ArrayList<String>();

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java
index 8e41830..7347b3a 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/HQLParser.java
@@ -82,6 +82,7 @@ import java.util.Queue;
 import java.util.Set;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Optional;
 import org.antlr.runtime.tree.Tree;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
@@ -674,4 +675,36 @@ public class HQLParser {
 
     return false;
   }
+
+  /**
+   *
+   * @param node an ASTNode
+   * @return true when input node is a SELECT AST Node. Otherwise, false.
+   *
+   */
+  public static boolean isSelectASTNode(final ASTNode node) {
+
+    Optional<Integer> astNodeType = getASTNodeType(node);
+    if (astNodeType.isPresent()) {
+      return astNodeType.get() == HiveParser.TOK_SELECT;
+    }
+
+    return false;
+  }
+
+  /**
+   *
+   * @param node an ASTNode
+   * @return When node is null or token inside node is null, then Optional.absent is returned.
+   *         Otherwise, an integer representing ASTNodeType is returned.
+   */
+  private static Optional<Integer> getASTNodeType(final ASTNode node) {
+
+    Optional<Integer> astNodeType = Optional.absent();
+    if (node != null && node.getToken() != null) {
+      astNodeType = Optional.of(node.getToken().getType());
+    }
+
+    return astNodeType;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index f515df1..0e414df 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -34,6 +34,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Database;
@@ -96,6 +97,7 @@ import org.testng.Assert;
  */
 
 @SuppressWarnings("deprecation")
+@Slf4j
 public class CubeTestSetup {
 
   public static String HOUR_FMT = "yyyy-MM-dd-HH";
@@ -111,16 +113,23 @@ public class CubeTestSetup {
   public static final String DERIVED_CUBE_NAME2 = "der2";
   public static final String DERIVED_CUBE_NAME3 = "der3";
   public static final String DERIVED_CUBE_NAME4 = "der4";
+
+  // Time Instances as Date Type
   public static Date now;
+  public static Date lastHour;
   public static Date twodaysBack;
   public static Date oneDayBack;
-  public static String twoDaysRange;
   public static Date twoMonthsBack;
+  public static Date before4daysStart;
+  public static Date before4daysEnd;
+
+  // Time Ranges
+  public static String lastHourTimeRange;
+  public static String twoDaysRange;
   public static String twoMonthsRangeUptoMonth;
   public static String twoMonthsRangeUptoHours;
   public static String twoDaysRangeBefore4days;
-  public static Date before4daysStart;
-  public static Date before4daysEnd;
+
   private static boolean zerothHour;
   private static String c1 = "C1";
   private static String c2 = "C2";
@@ -134,8 +143,17 @@ public class CubeTestSetup {
     }
     Calendar cal = Calendar.getInstance();
     now = cal.getTime();
+    log.debug("Test now:{}", now);
+
+    // Figure out if current hour is 0th hour
     zerothHour = (cal.get(Calendar.HOUR_OF_DAY) == 0);
-    System.out.println("Test now:" + now);
+
+    // Figure out last hour
+    cal.add(Calendar.HOUR_OF_DAY, -1);
+    lastHour = cal.getTime();
+    log.debug("LastHour:{}",lastHour);
+
+    cal.setTime(now);
     cal.add(Calendar.DAY_OF_MONTH, -1);
     oneDayBack = cal.getTime();
     cal.add(Calendar.DAY_OF_MONTH, -1);
@@ -163,6 +181,9 @@ public class CubeTestSetup {
         "time_range_in(dt, '" + getDateUptoMonth(twoMonthsBack) + "','" + getDateUptoMonth(now) + "')";
     twoMonthsRangeUptoHours =
         "time_range_in(dt, '" + getDateUptoHours(twoMonthsBack) + "','" + getDateUptoHours(now) + "')";
+
+    // calculate lastHourTimeRange
+    setLastHourTimeRange();
     inited = true;
   }
 
@@ -1914,4 +1935,11 @@ public class CubeTestSetup {
     HQLParser.printAST(HQLParser.parseHQL(query));
   }
 
+  private static void setLastHourTimeRange() {
+    lastHourTimeRange = getTimeRangeString(getDateUptoHours(lastHour), getDateUptoHours(now));
+  }
+
+  private static String getTimeRangeString(final String startDate, final String endDate) {
+    return "time_range_in(dt, '" + startDate + "','" + endDate + "')";
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
index ad4abcf..9ddc454 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
@@ -19,6 +19,8 @@
 
 package org.apache.lens.cube.parse;
 
+import static org.testng.Assert.assertEquals;
+
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
@@ -731,6 +733,38 @@ public class TestCubeRewriter extends TestQueryRewrite {
   }
 
   @Test
+  public void testSelectExprPromotionToGroupByWithSpacesInDimensionAliasAndWithAsKeywordBwColAndAlias() throws SemanticException, ParseException {
+
+    String inputQuery = "cube select name as `Alias With Spaces`, SUM(msr2) as `TestMeasure` from testCube join citydim"
+        + " on testCube.cityid = citydim.id where "+ lastHourTimeRange;
+
+    String expectedRewrittenQuery = "SELECT ( citydim . name ) as `Alias With Spaces` , sum(( testcube . msr2 )) "
+        + "testmeasure  FROM TestQueryRewrite.c2_testfact testcube inner JOIN TestQueryRewrite.c1_citytable citydim ON "
+        + "(( testcube . cityid ) = ( citydim . id )) WHERE (((( testcube . dt ) =  '"+CubeTestSetup.getDateUptoHours(lastHour)+"' ) AND ((citydim.dt ="
+        + " 'latest')))) GROUP BY ( citydim . name )";
+
+    String actualRewrittenQuery = rewrite(inputQuery, getConf());
+
+    assertEquals(actualRewrittenQuery, expectedRewrittenQuery);
+  }
+
+  @Test
+  public void testSelectExprPromotionToGroupByWithSpacesInDimensionAliasAndWithoutAsKeywordBwColAndAlias() throws SemanticException, ParseException {
+
+    String inputQuery = "cube select name `Alias With Spaces`, SUM(msr2) as `TestMeasure` from testCube join citydim"
+        + " on testCube.cityid = citydim.id where "+ lastHourTimeRange;
+
+    String expectedRewrittenQuery = "SELECT ( citydim . name ) as `Alias With Spaces` , sum(( testcube . msr2 )) "
+        + "testmeasure  FROM TestQueryRewrite.c2_testfact testcube inner JOIN TestQueryRewrite.c1_citytable citydim ON "
+        + "(( testcube . cityid ) = ( citydim . id )) WHERE (((( testcube . dt ) =  '"+CubeTestSetup.getDateUptoHours(lastHour)+"' ) AND ((citydim.dt ="
+        + " 'latest')))) GROUP BY ( citydim . name )";
+
+    String actualRewrittenQuery = rewrite(inputQuery, getConf());
+
+    assertEquals(actualRewrittenQuery, expectedRewrittenQuery);
+  }
+
+  @Test
   public void testCubeQueryWithAilas() throws Exception {
     String hqlQuery = rewrite("select SUM(msr2) m2 from" + " testCube where " + twoDaysRange, getConf());
     String expected =

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
index fd939e9..a8cdb38 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
@@ -19,6 +19,7 @@
 
 package org.apache.lens.cube.parse;
 
+import lombok.extern.slf4j.Slf4j;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.parse.ParseException;
@@ -28,6 +29,7 @@ import org.testng.annotations.AfterSuite;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeSuite;
 
+@Slf4j
 public abstract class TestQueryRewrite {
 
   private static CubeTestSetup setup;
@@ -52,13 +54,13 @@ public abstract class TestQueryRewrite {
   }
 
   protected String rewrite(String query, Configuration conf) throws SemanticException, ParseException {
-    System.out.println("User query:" + query);
-    CubeQueryContext rewrittenQuery = rewriteCtx(query, conf);
-    return rewrittenQuery.toHQL();
+    String rewrittenQuery = rewriteCtx(query, conf).toHQL();
+    log.info("Rewritten query: {}",rewrittenQuery);
+    return rewrittenQuery;
   }
 
   protected CubeQueryContext rewriteCtx(String query, Configuration conf) throws SemanticException, ParseException {
-    System.out.println("User query:" + query);
+    log.info("User query: {}",query);
     CubeQueryRewriter driver = new CubeQueryRewriter(new HiveConf(conf, HiveConf.class));
     return driver.rewrite(query);
   }

http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/c7bb1661/lens-cube/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/resources/log4j.properties b/lens-cube/src/test/resources/log4j.properties
index 9729de0..f514648 100644
--- a/lens-cube/src/test/resources/log4j.properties
+++ b/lens-cube/src/test/resources/log4j.properties
@@ -17,14 +17,20 @@
 # under the License.
 #
 
-log4j.rootLogger = INFO, ROOT
+# Configuration for loggers
+log4j.rootLogger=ERROR, STDOUT, TEST_LOG_FILE
 
-log4j.appender.out = org.apache.log4j.ConsoleAppender
-log4j.appender.out.layout = org.apache.log4j.PatternLayout
-log4j.appender.out.layout.ConversionPattern = %d (%t) [%p - %l] %m%n
-log4j.additivity.foo.bar.Baz=false
+log4j.logger.org.apache.lens=WARN, STDOUT, TEST_LOG_FILE
+log4j.additivity.org.apache.lens=false
 
-log4j.appender.ROOT=org.apache.log4j.RollingFileAppender
-log4j.appender.ROOT.File=target/test.log
-log4j.appender.ROOT.layout=org.apache.log4j.PatternLayout
-log4j.appender.ROOT.layout.ConversionPattern=%d{dd MMM yyyy HH:mm:ss,SSS} [%t] %-5p %c %x - %m%n
+# Configuration for appenders
+log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender
+log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout
+log4j.appender.STDOUT.layout.ConversionPattern=%d [%t] %-5p %c - %m%n
+log4j.appender.STDOUT.Threshold=WARN
+
+log4j.appender.TEST_LOG_FILE=org.apache.log4j.RollingFileAppender
+log4j.appender.TEST_LOG_FILE.File=target/test.log
+log4j.appender.TEST_LOG_FILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.TEST_LOG_FILE.layout.ConversionPattern=%d [%t] %-5p %c - %m%n
+log4j.appender.TEST_LOG_FILE.Threshold=WARN