You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2015/10/26 14:30:04 UTC
lens git commit: LENS-845 : Allow regex rename arguments to passed
configuration in columnar sql rewriter
Repository: lens
Updated Branches:
refs/heads/master daa65a5af -> e274c2dfd
LENS-845 : Allow regex rename arguments to passed configuration in columnar sql rewriter
Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/e274c2df
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/e274c2df
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/e274c2df
Branch: refs/heads/master
Commit: e274c2dfd8907a6fad012989bad63bc131dee444
Parents: daa65a5
Author: Rajat Khandelwal <pr...@apache.org>
Authored: Mon Oct 26 18:59:11 2015 +0530
Committer: Amareshwari Sriramadasu <am...@apache.org>
Committed: Mon Oct 26 18:59:11 2015 +0530
----------------------------------------------------------------------
.../lens/driver/jdbc/ColumnarSQLRewriter.java | 30 ++++++------
.../driver/jdbc/JDBCDriverConfConstants.java | 1 +
.../src/main/resources/jdbcdriver-default.xml | 6 +++
.../driver/jdbc/TestColumnarSQLRewriter.java | 49 +++++++++++---------
.../src/test/resources/jdbcdriver-site.xml | 6 +++
5 files changed, 55 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
index c6797aa..ccb2b7f 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
@@ -176,6 +176,7 @@ public class ColumnarSQLRewriter implements QueryRewriter {
/** The from ast. */
@Getter
protected ASTNode fromAST;
+ private HashMap<String, String> regexReplaceMap = new HashMap<>();
/**
* Instantiates a new columnar sql rewriter.
@@ -185,6 +186,14 @@ public class ColumnarSQLRewriter implements QueryRewriter {
@Override
public void init(Configuration conf) {
+ if (conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES) != null) {
+ for (String kv : conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES).split("(?<!\\\\),")) {
+ String[] kvArray = kv.split("=");
+ String key = kvArray[0].replaceAll("\\\\,", ",").trim();
+ String value = kvArray[1].replaceAll("\\\\,", ",").trim();
+ regexReplaceMap.put(key, value);
+ }
+ }
}
public String getClause() {
@@ -936,13 +945,7 @@ public class ColumnarSQLRewriter implements QueryRewriter {
* @return the string
*/
public String replaceUDFForDB(String query) {
- Map<String, String> imputnmatch = new LinkedHashMap<String, String>();
- imputnmatch.put("to_date", "date");
- imputnmatch.put("format_number", "format");
- imputnmatch.put("date_sub\\((.*?),\\s*([0-9]+\\s*)\\)", "date_sub($1, interval $2 day)");
- imputnmatch.put("date_add\\((.*?),\\s*([0-9]+\\s*)\\)", "date_add($1, interval $2 day)");
-
- for (Map.Entry<String, String> entry : imputnmatch.entrySet()) {
+ for (Map.Entry<String, String> entry : regexReplaceMap.entrySet()) {
query = query.replaceAll(entry.getKey(), entry.getValue());
}
return query;
@@ -1212,8 +1215,9 @@ public class ColumnarSQLRewriter implements QueryRewriter {
reset();
try {
+ String finalRewrittenQuery;
if (query.toLowerCase().matches("(.*)union all(.*)")) {
- String finalRewrittenQuery = "";
+ finalRewrittenQuery = "";
String[] queries = query.toLowerCase().split("union all");
for (int i = 0; i < queries.length; i++) {
log.info("Union Query Part {} : {}", i, queries[i]);
@@ -1223,16 +1227,14 @@ public class ColumnarSQLRewriter implements QueryRewriter {
finalRewrittenQuery = mergedQuery.toString().substring(0, mergedQuery.lastIndexOf("union all"));
reset();
}
- queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery);
- log.info("Input Query : {}", query);
- log.info("Rewritten Query : {}", queryReplacedUdf);
} else {
ast = HQLParser.parseHQL(query, metastoreConf);
buildQuery(conf, metastoreConf);
- queryReplacedUdf = replaceUDFForDB(rewrittenQuery.toString());
- log.info("Input Query : {}", query);
- log.info("Rewritten Query : {}", queryReplacedUdf);
+ finalRewrittenQuery = rewrittenQuery.toString();
}
+ queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery);
+ log.info("Input Query : {}", query);
+ log.info("Rewritten Query : {}", queryReplacedUdf);
} catch (SemanticException e) {
throw new LensException(e);
}
http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
index a26acde..70815bb 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
@@ -101,4 +101,5 @@ public final class JDBCDriverConfConstants {
public static final String WAITING_QUERIES_SELECTION_POLICY_FACTORIES_KEY = JDBC_DRIVER_PFX
+ "waiting.queries.selection.policy.factories";
+ public static final String REGEX_REPLACEMENT_VALUES = JDBC_DRIVER_PFX + "regex.replacement.values";
}
http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
index e94c2f3..163ed42 100644
--- a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
+++ b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
@@ -35,6 +35,12 @@
</property>
<property>
+ <name>lens.driver.jdbc.regex.replacement.values</name>
+ <value>to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value>
+ <description>Rewriting the HQL to optimized sql queries</description>
+ </property>
+
+ <property>
<name>lens.driver.jdbc.driver.class</name>
<value>com.mysql.jdbc.Driver</value>
<description>Type of JDBC driver used to connect backend database</description>
http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
index 31c7dad..d4d812f 100644
--- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
@@ -134,6 +134,8 @@ public class TestColumnarSQLRewriter {
*/
@BeforeTest
public void setup() throws Exception {
+ conf.addResource("jdbcdriver-default.xml");
+ conf.addResource("jdbcdriver-site.xml");
qtest.init(conf);
List<FieldSchema> factColumns = new ArrayList<>();
@@ -205,7 +207,7 @@ public class TestColumnarSQLRewriter {
String query = "select count(distinct id) from location_dim";
String actual = qtest.rewrite(query, conf, hconf);
String expected = "select count( distinct id ) from location_dim ";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
String query2 = "select count(distinct id) from location_dim location_dim";
String actual2 = qtest.rewrite(query2, conf, hconf);
@@ -260,7 +262,7 @@ public class TestColumnarSQLRewriter {
+ "and (( location_dim___location_dim . location_name ) = 'test123' ))";
String actual = qtest.joinCondition.toString();
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
/**
@@ -289,7 +291,7 @@ public class TestColumnarSQLRewriter {
+ ", , ( time_dim___time_dim . time_key ) between '2013-01-01' and '2013-01-31' ]";
String actual = qtest.rightFilter.toString();
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -349,7 +351,7 @@ public class TestColumnarSQLRewriter {
String rwq = qtest.rewrite(query, conf, hconf);
String expected = "sales_fact___fact.time_key,sales_fact___fact.location_key,sales_fact___fact.item_key,";
String actual = qtest.factKeys.toString();
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
/**
@@ -382,7 +384,7 @@ public class TestColumnarSQLRewriter {
+ "and sales_fact___fact.item_key in ( select item_dim .item_key from "
+ "item_dim where (( item_dim. item_name ) = 'item_1' ) ) and";
String actual = qtest.allSubQueries.toString();
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
/**
@@ -395,7 +397,7 @@ public class TestColumnarSQLRewriter {
String query =
- "select fact.time_key,time_dim.day_of_week,to_date(time_dim.day),item_dim.item_key, "
+ "select fact.time_key,time_dim.day_of_week, weekofyear(time_dim.day), to_date(time_dim.day),item_dim.item_key, "
+ "case when sum(fact.dollars_sold) = 0 then 0.0 else sum(fact.dollars_sold) end dollars_sold, "
+ "format_number(sum(fact.units_sold),4),format_number(avg(fact.dollars_sold),'##################.###'),"
+ "min(fact.dollars_sold),max(fact.dollars_sold)" + "from sales_fact fact "
@@ -413,6 +415,7 @@ public class TestColumnarSQLRewriter {
String actual = qtest.rewrite(query, conf, hconf);
String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ + "week((time_dim__time_dim . day )), "
+ "date(( time_dim___time_dim . day )), ( item_dim___item_dim . item_key ), "
+ "case when (sum(alias2) = 0 ) then 0.0 else sum(alias2) end dollars_sold , "
+ "format(sum(alias3), 4 ), format(avg(alias4), '##################.###' ), "
@@ -442,7 +445,7 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ "order by dollars_sold asc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
/**
@@ -535,7 +538,7 @@ public class TestColumnarSQLRewriter {
+ "where ( time_dim___time_dim . time_key ) between '2013-03-01' and '2013-03-05' "
+ "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ) order by dollars_sold asc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -554,7 +557,7 @@ public class TestColumnarSQLRewriter {
+ "(( location_dim___location_dim . time_id ) = ( time_dim___time_dim . id )) "
+ "where ( time_dim___time_dim . full_date ) "
+ "between '2013-01-01 00:00:00' and '2013-01-04 00:00:00' limit 10";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -605,7 +608,7 @@ public class TestColumnarSQLRewriter {
+ "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ "order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -658,7 +661,7 @@ public class TestColumnarSQLRewriter {
+ "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ),"
+ " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ "order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -701,7 +704,7 @@ public class TestColumnarSQLRewriter {
+ "and date_sub( '2013-01-31' , interval 3 day) and (( item_dim___item_dim . item_name ) = 'item_1' )) "
+ "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold asc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -722,7 +725,7 @@ public class TestColumnarSQLRewriter {
+ "(select id, full_date from time_dim) time_dim___time_dim on (( location_dim___location_dim . time_id ) = "
+ "( time_dim___time_dim . id )) where ( time_dim___time_dim . full_date ) "
+ "between '2013-01-01 00:00:00' and '2013-01-04 00:00:00' limit 10";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -759,7 +762,7 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
+ "order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -791,7 +794,7 @@ public class TestColumnarSQLRewriter {
+ "where (( dim1___dim1 . date ) = '2014-11-25 00:00:00' ) "
+ "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ), ( dim3___dim3 . name ), ( dim4___dim4 . name )";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -820,7 +823,7 @@ public class TestColumnarSQLRewriter {
+ " = '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) "
+ "group by ( dim1___dim1 . date ), ( dim2___dim2 . name )";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -850,7 +853,7 @@ public class TestColumnarSQLRewriter {
+ "where ((( dim1___dim1 . date ) = '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) "
+ "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ) order by dim1_date asc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -878,7 +881,7 @@ public class TestColumnarSQLRewriter {
+ "'2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) group by ( dim1___dim1 . date ), "
+ "( dim2___dim2 . name ) order by dim1_date asc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -929,7 +932,7 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
+ "order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -978,7 +981,7 @@ public class TestColumnarSQLRewriter {
+ "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+ "( time_dim___time_dim . day ) order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -1017,7 +1020,7 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
+ "order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@@ -1070,7 +1073,7 @@ public class TestColumnarSQLRewriter {
+ "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day )"
+ " order by dollars_sold desc";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
@Test
@@ -1100,7 +1103,7 @@ public class TestColumnarSQLRewriter {
+ " group by ( sales_fact__db_sales_fact_fact . time_key ) "
+ "having (sum(alias2) > 100 )";
- compareQueries(expected, actual);
+ compareQueries(actual, expected);
}
/**
http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
index 1dc0a85..5594fbf 100644
--- a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
+++ b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
@@ -54,4 +54,10 @@
<name>lens.driver.jdbc.estimate.pool.max.statements</name>
<value>15</value>
</property>
+
+ <property>
+ <name>lens.driver.jdbc.regex.replacement.values</name>
+ <value>weekofyear=week, to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value>
+ <description>Rewriting the HQL to optimized sql queries</description>
+ </property>
</configuration>
\ No newline at end of file