You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by sh...@apache.org on 2015/12/30 08:10:27 UTC

[08/50] [abbrv] lens git commit: LENS-851 : queries where results of two storage tables of same fact are unioned, the rows should be aggregated

http://git-wip-us.apache.org/repos/asf/lens/blob/c445730c/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQuery.java
----------------------------------------------------------------------
diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQuery.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQuery.java
index 8e35ea9..db3ba9b 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQuery.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQuery.java
@@ -19,22 +19,28 @@
 
 package org.apache.lens.cube.parse;
 
+import static org.apache.lens.cube.parse.HQLParser.equalsAST;
+
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lens.server.api.error.LensException;
+
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.builder.EqualsBuilder;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
 
 import com.google.common.base.Objects;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
-
-import lombok.Getter;
-import lombok.Setter;
-
+import lombok.Data;
 import lombok.extern.slf4j.Slf4j;
 
 @Slf4j
 public class TestQuery {
+  private static HiveConf conf = new HiveConf();
+  private ASTNode ast;
 
   private String actualQuery;
   private String joinQueryPart = null;
@@ -46,6 +52,7 @@ public class TestQuery {
   private String preJoinQueryPart = null;
 
   private String postJoinQueryPart = null;
+  private boolean processed = false;
 
   public enum JoinType {
     INNERJOIN,
@@ -54,32 +61,45 @@ public class TestQuery {
     FULLOUTERJOIN,
     UNIQUE,
     LEFTSEMIJOIN,
-    JOIN;
+    JOIN
   }
 
   public enum Clause {
     WHERE,
     GROUPBY,
     HAVING,
-    ORDEREDBY;
+    ORDEREDBY
   }
 
   public TestQuery(String query) {
     this.actualQuery = query;
-    this.trimmedQuery = getTrimmedQuery(query);
-    this.joinQueryPart = extractJoinStringFromQuery(trimmedQuery);
-    /**
-     * Get the join query part, pre-join query and post-join query part from the trimmed query.
-     *
-     */
-    if (StringUtils.isNotBlank(joinQueryPart)) {
-      this.preJoinQueryPart = trimmedQuery.substring(0, trimmedQuery.indexOf(joinQueryPart));
-      this.postJoinQueryPart = trimmedQuery.substring(getMinIndexOfClause());
-      prepareJoinStrings(trimmedQuery);
-    } else {
-      int minIndex = getMinIndexOfClause();
-      this.preJoinQueryPart = trimmedQuery.substring(0, minIndex);
-      this.postJoinQueryPart = trimmedQuery.substring(minIndex);
+  }
+
+  public ASTNode getAST() throws LensException {
+    if (this.ast == null) {
+      ast = HQLParser.parseHQL(this.actualQuery, conf);
+    }
+    return ast;
+  }
+
+  public void processQueryAsString() {
+    if (!processed) {
+      processed = true;
+      this.trimmedQuery = getTrimmedQuery(actualQuery);
+      this.joinQueryPart = extractJoinStringFromQuery(trimmedQuery);
+      /**
+       * Get the join query part, pre-join query and post-join query part from the trimmed query.
+       *
+       */
+      if (StringUtils.isNotBlank(joinQueryPart)) {
+        this.preJoinQueryPart = trimmedQuery.substring(0, trimmedQuery.indexOf(joinQueryPart));
+        this.postJoinQueryPart = trimmedQuery.substring(getMinIndexOfClause());
+        prepareJoinStrings(trimmedQuery);
+      } else {
+        int minIndex = getMinIndexOfClause();
+        this.preJoinQueryPart = trimmedQuery.substring(0, minIndex);
+        this.postJoinQueryPart = trimmedQuery.substring(minIndex);
+      }
     }
   }
 
@@ -105,11 +125,11 @@ public class TestQuery {
       query = query.substring(nextJoinIndex + joinDetails.getJoinType().name().length());
     }
   }
-
+  @Data
   private class JoinDetails {
-    @Setter @Getter private JoinType joinType;
-    @Setter @Getter private int index;
-    @Setter @Getter private String joinString;
+    private JoinType joinType;
+    private int index;
+    private String joinString;
   }
 
   /**
@@ -129,7 +149,7 @@ public class TestQuery {
     joinDetails.setIndex(nextJoinIndex);
     if (nextJoinIndex != Integer.MAX_VALUE) {
       joinDetails.setJoinString(
-          getJoinString(query.substring(nextJoinIndex + nextJoinTypePart.name().length())));
+        getJoinString(query.substring(nextJoinIndex + nextJoinTypePart.name().length())));
     }
     joinDetails.setJoinType(nextJoinTypePart);
     return joinDetails;
@@ -164,7 +184,7 @@ public class TestQuery {
       }
       minClauseIndex = clauseIndex < minClauseIndex ? clauseIndex : minClauseIndex;
     }
-    return (minClauseIndex == Integer.MAX_VALUE || minClauseIndex == -1) ? query.length() : minClauseIndex;
+    return (minClauseIndex == Integer.MAX_VALUE) ? query.length() : minClauseIndex;
   }
 
   private int getMinIndexOfJoinType() {
@@ -190,6 +210,9 @@ public class TestQuery {
 
   @Override
   public boolean equals(Object query) {
+    if (!(query instanceof TestQuery)) {
+      return false;
+    }
     TestQuery expected = (TestQuery) query;
     if (this == expected) {
       return true;
@@ -201,9 +224,23 @@ public class TestQuery {
     } else if (expected.actualQuery == null) {
       return false;
     }
-    return Objects.equal(this.joinTypeStrings, expected.joinTypeStrings)
-        && Objects.equal(this.preJoinQueryPart, expected.preJoinQueryPart)
-        && Objects.equal(this.postJoinQueryPart, expected.postJoinQueryPart);
+    boolean equals = false;
+    try {
+      equals = equalsAST(this.getAST(), expected.getAST());
+    } catch (LensException e) {
+      log.error("AST not valid", e);
+    }
+    return equals || stringEquals(expected);
+  }
+
+  private boolean stringEquals(TestQuery expected) {
+    processQueryAsString();
+    expected.processQueryAsString();
+    return new EqualsBuilder()
+      .append(this.joinTypeStrings, expected.joinTypeStrings)
+      .append(this.preJoinQueryPart, expected.preJoinQueryPart)
+      .append(this.postJoinQueryPart, expected.postJoinQueryPart)
+      .build();
   }
 
   @Override
@@ -212,9 +249,6 @@ public class TestQuery {
   }
 
   public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append("Actual Query: " + actualQuery).append("\n");
-    sb.append("JoinQueryString: " + joinTypeStrings);
-    return sb.toString();
+    return "Actual Query: " + actualQuery + "\n" + "JoinQueryString: " + joinTypeStrings;
   }
 }

http://git-wip-us.apache.org/repos/asf/lens/blob/c445730c/src/site/apt/user/olap-query-conf.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/user/olap-query-conf.apt b/src/site/apt/user/olap-query-conf.apt
index 6606d42..6f84869 100644
--- a/src/site/apt/user/olap-query-conf.apt
+++ b/src/site/apt/user/olap-query-conf.apt
@@ -36,36 +36,42 @@ OLAP query configuration
 *--+--+---+--+
 |6|lens.cube.query.enable.flattening.bridge.tables|false|Flag specifies if fields selected have to be flattened or not, if they are coming from tables with many to many relationship in join. If false, field selection will be simple join and selecting the field. If true, the fields from bridge tables will be aggregated grouped by join key.|
 *--+--+---+--+
-|7|lens.cube.query.fail.if.data.partial|false|Whether to fail the query of data is partial|
+|7|lens.cube.query.enable.storages.union|false|Sometimes One storage table doesn't contain all required partitions, and the query needs to be answered from two storage tables. Enabling this (make value = <true>) allows rewrite of such queries. If it's <false>, then such queries will fail in rewrite phase. The feature should only be enabled when all the aggregate functions used in the query (explicitly or implicitly picked from default aggregates of used measures) are transitive. Transitive aggregate functions are those that follow the following property:\ |
+| |                                     |     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 \ |
+| |                                     |     |<<<f(a, b, c, d) = f(f(a, b), f(c, d)) for all possible values of a,b,c,d.>>>                                                                                                                                                                                                                                                                                                                                                                                                                                                    \ |
+| |                                     |     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 \ |
+| |                                     |     |e.g. SUM, MAX, MIN etc are transitive aggregate functions, while AVG, COUNT etc are not.                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 *--+--+---+--+
-|8|lens.cube.query.join.type|INNER|Tells what the join type is, in-case of automatic resolution of joins is enabled|
+|8|lens.cube.query.fail.if.data.partial|false|Whether to fail the query of data is partial|
 *--+--+---+--+
-|9|lens.cube.query.lookahead.ptparts.forinterval.${interval}|1|The value of number of lookahead process time partitions for interval specified. Interval can be any Update period.|
+|9|lens.cube.query.join.type|INNER|Tells what the join type is, in-case of automatic resolution of joins is enabled|
 *--+--+---+--+
-|10|lens.cube.query.max.interval| |Maximum value of the update period that the query timed dimensions can take values of. For example, if query involves month ranges, user can say query maximum interval is daily, then no monthly partitions will be picked.|
+|10|lens.cube.query.lookahead.ptparts.forinterval.${interval}|1|The value of number of lookahead process time partitions for interval specified. Interval can be any Update period.|
 *--+--+---+--+
-|11|lens.cube.query.nonexisting.partitions| |The list of comma separated non existing partitions, if query can run with partial data. The value will be set by the cube query rewriter|
+|11|lens.cube.query.max.interval| |Maximum value of the update period that the query timed dimensions can take values of. For example, if query involves month ranges, user can say query maximum interval is daily, then no monthly partitions will be picked.|
 *--+--+---+--+
-|12|lens.cube.query.partition.where.clause.format| |The simple date format of how the queried partition should be put in where clause. If nothing is specified, it will use the format from org.apache.lens.cube.metadata.UpdatePeriod for each type of partition|
+|12|lens.cube.query.nonexisting.partitions| |The list of comma separated non existing partitions, if query can run with partial data. The value will be set by the cube query rewriter|
 *--+--+---+--+
-|13|lens.cube.query.pick.lightest.fact.first|false|If set to true, lightest fact will be resolved first than resolving storages. Otherwise, storages will be resolved to check all partitions exist and then pick lightest fact among candidates|
+|13|lens.cube.query.partition.where.clause.format| |The simple date format of how the queried partition should be put in where clause. If nothing is specified, it will use the format from org.apache.lens.cube.metadata.UpdatePeriod for each type of partition|
 *--+--+---+--+
-|14|lens.cube.query.process.time.partition.column| |The column name which is a process time column. If process time column is specified, query rewriter will look ahead the partitions of other timed dimensions inside this column.|
+|14|lens.cube.query.pick.lightest.fact.first|false|If set to true, lightest fact will be resolved first than resolving storages. Otherwise, storages will be resolved to check all partitions exist and then pick lightest fact among candidates|
 *--+--+---+--+
-|15|lens.cube.query.promote.groupby.toselect|false|Tells whether to promote group by clauses to be promoted to select expressions if they are already not projected. To enable automatic promotion, this value should be true.|
+|15|lens.cube.query.process.time.partition.column| |The column name which is a process time column. If process time column is specified, query rewriter will look ahead the partitions of other timed dimensions inside this column.|
 *--+--+---+--+
-|16|lens.cube.query.promote.select.togroupby|false|Tells whether to promote select expressions which is not inside any aggregate, to be promoted to groupby clauses, if they are already not part of groupby clauses. To enable automatic promotion, this value should be true.|
+|16|lens.cube.query.promote.groupby.toselect|false|Tells whether to promote group by clauses to be promoted to select expressions if they are already not projected. To enable automatic promotion, this value should be true.|
 *--+--+---+--+
-|17|lens.cube.query.replace.timedim|true|Tells whether timedim attribute queried in the time range should be replaced with its corresponding partition column name.|
+|17|lens.cube.query.promote.select.togroupby|false|Tells whether to promote select expressions which is not inside any aggregate, to be promoted to groupby clauses, if they are already not part of groupby clauses. To enable automatic promotion, this value should be true.|
 *--+--+---+--+
-|18|lens.cube.query.time.range.writer.class|org.apache.lens.cube.parse.ORTimeRangeWriter|The timerange writer class which specifies how the resolved partitions in timeranges should be written in final query. Available writers are org.apache.lens.cube.parse.ORTimeRangeWriter and org.apache.lens.cube.parse.BetweenTimeRangeWriter|
+|18|lens.cube.query.replace.timedim|true|Tells whether timedim attribute queried in the time range should be replaced with its corresponding partition column name.|
 *--+--+---+--+
-|19|lens.cube.query.valid.${cubename}.facttables| |List of comma separated fact tables that are valid for cube. If no value is specified, all fact tables are valid|
+|19|lens.cube.query.time.range.writer.class|org.apache.lens.cube.parse.ORTimeRangeWriter|The timerange writer class which specifies how the resolved partitions in timeranges should be written in final query. Available writers are org.apache.lens.cube.parse.ORTimeRangeWriter and org.apache.lens.cube.parse.BetweenTimeRangeWriter|
 *--+--+---+--+
-|20|lens.cube.query.valid.dim.storgaetables| |List of comma separated dimension storage tables that are valid. If no value is specified, all tables are valid|
+|20|lens.cube.query.valid.${cubename}.facttables| |List of comma separated fact tables that are valid for cube. If no value is specified, all fact tables are valid|
 *--+--+---+--+
-|21|lens.cube.query.valid.fact.${facttable}.storage.${storagename}.updateperiods| |List of comma separated update periods that are valid for a fact on a storage. If no value is specified, all update periods are valid|
+|21|lens.cube.query.valid.dim.storgaetables| |List of comma separated dimension storage tables that are valid. If no value is specified, all tables are valid|
 *--+--+---+--+
-|22|lens.cube.query.valid.fact.${facttable}.storagetables| |List of comma separated storage tables that are valid for a fact. If no value is specified, all storage tables are valid|
+|22|lens.cube.query.valid.fact.${facttable}.storage.${storagename}.updateperiods| |List of comma separated update periods that are valid for a fact on a storage. If no value is specified, all update periods are valid|
+*--+--+---+--+
+|23|lens.cube.query.valid.fact.${facttable}.storagetables| |List of comma separated storage tables that are valid for a fact. If no value is specified, all storage tables are valid|
 *--+--+---+--+
 The configuration parameters and their default values