You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2015/10/26 14:30:04 UTC

lens git commit: LENS-845 : Allow regex rename arguments to passed configuration in columnar sql rewriter

Repository: lens
Updated Branches:
  refs/heads/master daa65a5af -> e274c2dfd


LENS-845 : Allow regex rename arguments to passed configuration in columnar sql rewriter


Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/e274c2df
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/e274c2df
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/e274c2df

Branch: refs/heads/master
Commit: e274c2dfd8907a6fad012989bad63bc131dee444
Parents: daa65a5
Author: Rajat Khandelwal <pr...@apache.org>
Authored: Mon Oct 26 18:59:11 2015 +0530
Committer: Amareshwari Sriramadasu <am...@apache.org>
Committed: Mon Oct 26 18:59:11 2015 +0530

----------------------------------------------------------------------
 .../lens/driver/jdbc/ColumnarSQLRewriter.java   | 30 ++++++------
 .../driver/jdbc/JDBCDriverConfConstants.java    |  1 +
 .../src/main/resources/jdbcdriver-default.xml   |  6 +++
 .../driver/jdbc/TestColumnarSQLRewriter.java    | 49 +++++++++++---------
 .../src/test/resources/jdbcdriver-site.xml      |  6 +++
 5 files changed, 55 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
index c6797aa..ccb2b7f 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/ColumnarSQLRewriter.java
@@ -176,6 +176,7 @@ public class ColumnarSQLRewriter implements QueryRewriter {
   /** The from ast. */
   @Getter
   protected ASTNode fromAST;
+  private HashMap<String, String> regexReplaceMap = new HashMap<>();
 
   /**
    * Instantiates a new columnar sql rewriter.
@@ -185,6 +186,14 @@ public class ColumnarSQLRewriter implements QueryRewriter {
 
   @Override
   public void init(Configuration conf) {
+    if (conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES) != null) {
+      for (String kv : conf.get(JDBCDriverConfConstants.REGEX_REPLACEMENT_VALUES).split("(?<!\\\\),")) {
+        String[] kvArray = kv.split("=");
+        String key = kvArray[0].replaceAll("\\\\,", ",").trim();
+        String value = kvArray[1].replaceAll("\\\\,", ",").trim();
+        regexReplaceMap.put(key, value);
+      }
+    }
   }
 
   public String getClause() {
@@ -936,13 +945,7 @@ public class ColumnarSQLRewriter implements QueryRewriter {
    * @return the string
    */
   public String replaceUDFForDB(String query) {
-    Map<String, String> imputnmatch = new LinkedHashMap<String, String>();
-    imputnmatch.put("to_date", "date");
-    imputnmatch.put("format_number", "format");
-    imputnmatch.put("date_sub\\((.*?),\\s*([0-9]+\\s*)\\)", "date_sub($1, interval $2 day)");
-    imputnmatch.put("date_add\\((.*?),\\s*([0-9]+\\s*)\\)", "date_add($1, interval $2 day)");
-
-    for (Map.Entry<String, String> entry : imputnmatch.entrySet()) {
+    for (Map.Entry<String, String> entry : regexReplaceMap.entrySet()) {
       query = query.replaceAll(entry.getKey(), entry.getValue());
     }
     return query;
@@ -1212,8 +1215,9 @@ public class ColumnarSQLRewriter implements QueryRewriter {
     reset();
 
     try {
+      String finalRewrittenQuery;
       if (query.toLowerCase().matches("(.*)union all(.*)")) {
-        String finalRewrittenQuery = "";
+        finalRewrittenQuery = "";
         String[] queries = query.toLowerCase().split("union all");
         for (int i = 0; i < queries.length; i++) {
           log.info("Union Query Part {} : {}", i, queries[i]);
@@ -1223,16 +1227,14 @@ public class ColumnarSQLRewriter implements QueryRewriter {
           finalRewrittenQuery = mergedQuery.toString().substring(0, mergedQuery.lastIndexOf("union all"));
           reset();
         }
-        queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery);
-        log.info("Input Query : {}", query);
-        log.info("Rewritten Query : {}", queryReplacedUdf);
       } else {
         ast = HQLParser.parseHQL(query, metastoreConf);
         buildQuery(conf, metastoreConf);
-        queryReplacedUdf = replaceUDFForDB(rewrittenQuery.toString());
-        log.info("Input Query : {}", query);
-        log.info("Rewritten Query :  {}", queryReplacedUdf);
+        finalRewrittenQuery = rewrittenQuery.toString();
       }
+      queryReplacedUdf = replaceUDFForDB(finalRewrittenQuery);
+      log.info("Input Query : {}", query);
+      log.info("Rewritten Query : {}", queryReplacedUdf);
     } catch (SemanticException e) {
       throw new LensException(e);
     }

http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
index a26acde..70815bb 100644
--- a/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
+++ b/lens-driver-jdbc/src/main/java/org/apache/lens/driver/jdbc/JDBCDriverConfConstants.java
@@ -101,4 +101,5 @@ public final class JDBCDriverConfConstants {
 
   public static final String WAITING_QUERIES_SELECTION_POLICY_FACTORIES_KEY = JDBC_DRIVER_PFX
       + "waiting.queries.selection.policy.factories";
+  public static final String REGEX_REPLACEMENT_VALUES = JDBC_DRIVER_PFX + "regex.replacement.values";
 }

http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
index e94c2f3..163ed42 100644
--- a/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
+++ b/lens-driver-jdbc/src/main/resources/jdbcdriver-default.xml
@@ -35,6 +35,12 @@
   </property>
 
   <property>
+    <name>lens.driver.jdbc.regex.replacement.values</name>
+    <value>to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value>
+    <description>Rewriting the HQL to optimized sql queries</description>
+  </property>
+
+  <property>
     <name>lens.driver.jdbc.driver.class</name>
     <value>com.mysql.jdbc.Driver</value>
     <description>Type of JDBC driver used to connect backend database</description>

http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
index 31c7dad..d4d812f 100644
--- a/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
+++ b/lens-driver-jdbc/src/test/java/org/apache/lens/driver/jdbc/TestColumnarSQLRewriter.java
@@ -134,6 +134,8 @@ public class TestColumnarSQLRewriter {
    */
   @BeforeTest
   public void setup() throws Exception {
+    conf.addResource("jdbcdriver-default.xml");
+    conf.addResource("jdbcdriver-site.xml");
     qtest.init(conf);
 
     List<FieldSchema> factColumns = new ArrayList<>();
@@ -205,7 +207,7 @@ public class TestColumnarSQLRewriter {
     String query = "select count(distinct id) from location_dim";
     String actual = qtest.rewrite(query, conf, hconf);
     String expected = "select count( distinct  id ) from location_dim ";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
 
     String query2 = "select count(distinct id) from location_dim  location_dim";
     String actual2 = qtest.rewrite(query2, conf, hconf);
@@ -260,7 +262,7 @@ public class TestColumnarSQLRewriter {
       + "and (( location_dim___location_dim . location_name ) =  'test123' ))";
     String actual = qtest.joinCondition.toString();
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   /**
@@ -289,7 +291,7 @@ public class TestColumnarSQLRewriter {
       + ", , ( time_dim___time_dim . time_key ) between  '2013-01-01'  and  '2013-01-31' ]";
     String actual = qtest.rightFilter.toString();
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
 
   }
 
@@ -349,7 +351,7 @@ public class TestColumnarSQLRewriter {
     String rwq = qtest.rewrite(query, conf, hconf);
     String expected = "sales_fact___fact.time_key,sales_fact___fact.location_key,sales_fact___fact.item_key,";
     String actual = qtest.factKeys.toString();
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   /**
@@ -382,7 +384,7 @@ public class TestColumnarSQLRewriter {
       + "and sales_fact___fact.item_key in  (  select item_dim .item_key from "
       + "item_dim where (( item_dim. item_name ) =  'item_1' ) ) and";
     String actual = qtest.allSubQueries.toString();
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   /**
@@ -395,7 +397,7 @@ public class TestColumnarSQLRewriter {
 
     String query =
 
-      "select fact.time_key,time_dim.day_of_week,to_date(time_dim.day),item_dim.item_key, "
+      "select fact.time_key,time_dim.day_of_week, weekofyear(time_dim.day), to_date(time_dim.day),item_dim.item_key, "
         + "case when sum(fact.dollars_sold) = 0 then 0.0 else sum(fact.dollars_sold) end dollars_sold, "
         + "format_number(sum(fact.units_sold),4),format_number(avg(fact.dollars_sold),'##################.###'),"
         + "min(fact.dollars_sold),max(fact.dollars_sold)" + "from sales_fact fact "
@@ -413,6 +415,7 @@ public class TestColumnarSQLRewriter {
     String actual = qtest.rewrite(query, conf, hconf);
 
     String expected = "select ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
+            + "week((time_dim__time_dim . day )), "
             + "date(( time_dim___time_dim . day )), ( item_dim___item_dim . item_key ),  "
             + "case  when (sum(alias2) =  0 ) then  0.0  else sum(alias2) end  dollars_sold , "
             + "format(sum(alias3),  4 ), format(avg(alias4),  '##################.###' ), "
@@ -442,7 +445,7 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
             + "order by dollars_sold  asc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   /**
@@ -535,7 +538,7 @@ public class TestColumnarSQLRewriter {
             + "where ( time_dim___time_dim . time_key ) between  '2013-03-01'  and  '2013-03-05'  "
             + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ) order by dollars_sold  asc";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -554,7 +557,7 @@ public class TestColumnarSQLRewriter {
       + "(( location_dim___location_dim . time_id ) = ( time_dim___time_dim . id ))  "
       + "where ( time_dim___time_dim . full_date ) "
       + "between  '2013-01-01 00:00:00'  and  '2013-01-04 00:00:00'  limit 10";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
 
   }
 
@@ -605,7 +608,7 @@ public class TestColumnarSQLRewriter {
             + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
             + "order by dollars_sold  desc";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
 
   }
 
@@ -658,7 +661,7 @@ public class TestColumnarSQLRewriter {
             + "( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ),"
             + " ( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
             + "order by dollars_sold  desc";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
 
   }
 
@@ -701,7 +704,7 @@ public class TestColumnarSQLRewriter {
         + "and date_sub( '2013-01-31' , interval 3  day) and (( item_dim___item_dim . item_name ) =  'item_1' )) "
         + "group by ( sales_fact___fact . time_key ), ( time_dim___time_dim . day_of_week ), "
         + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) order by dollars_sold  asc";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -722,7 +725,7 @@ public class TestColumnarSQLRewriter {
         + "(select id, full_date from time_dim) time_dim___time_dim on (( location_dim___location_dim . time_id ) = "
         + "( time_dim___time_dim . id ))  where ( time_dim___time_dim . full_date ) "
         + "between  '2013-01-01 00:00:00'  and  '2013-01-04 00:00:00'  limit 10";
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -759,7 +762,7 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
             + "order by dollars_sold  desc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
 
@@ -791,7 +794,7 @@ public class TestColumnarSQLRewriter {
         + "where (( dim1___dim1 . date ) =  '2014-11-25 00:00:00' ) "
         + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ), ( dim3___dim3 . name ), ( dim4___dim4 . name )";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
 
@@ -820,7 +823,7 @@ public class TestColumnarSQLRewriter {
         + " =  '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) "
         + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name )";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -850,7 +853,7 @@ public class TestColumnarSQLRewriter {
         + "where ((( dim1___dim1 . date ) =  '2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) "
         + "group by ( dim1___dim1 . date ), ( dim2___dim2 . name ) order by dim1_date  asc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -878,7 +881,7 @@ public class TestColumnarSQLRewriter {
         + "'2014-11-25 00:00:00' ) and ( fact___f . m4 ) is not null ) group by ( dim1___dim1 . date ), "
         + "( dim2___dim2 . name ) order by dim1_date  asc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -929,7 +932,7 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day ), ( item_dim___item_dim . item_key ) "
             + "order by dollars_sold  desc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -978,7 +981,7 @@ public class TestColumnarSQLRewriter {
             + "group by ( sales_fact__db_sales_fact_fact . time_key ), ( time_dim___time_dim . day_of_week ), "
             + "( time_dim___time_dim . day ) order by dollars_sold  desc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -1017,7 +1020,7 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day ) "
             + "order by dollars_sold  desc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
 
@@ -1070,7 +1073,7 @@ public class TestColumnarSQLRewriter {
             + "( time_dim___time_dim . day_of_week ), ( time_dim___time_dim . day )"
             + " order by dollars_sold  desc";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   @Test
@@ -1100,7 +1103,7 @@ public class TestColumnarSQLRewriter {
             + " group by ( sales_fact__db_sales_fact_fact . time_key ) "
             + "having (sum(alias2) >  100 )";
 
-    compareQueries(expected, actual);
+    compareQueries(actual, expected);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/lens/blob/e274c2df/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
----------------------------------------------------------------------
diff --git a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
index 1dc0a85..5594fbf 100644
--- a/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
+++ b/lens-driver-jdbc/src/test/resources/jdbcdriver-site.xml
@@ -54,4 +54,10 @@
   <name>lens.driver.jdbc.estimate.pool.max.statements</name>
   <value>15</value>
   </property>
+
+  <property>
+    <name>lens.driver.jdbc.regex.replacement.values</name>
+    <value>weekofyear=week, to_date=date, format_number=format, date_sub\((.*?)\,\s*([0-9]+\s*)\)=date_sub($1\, interval $2 day), date_add\((.*?)\,\s*([0-9]+\s*)\)=date_add($1\, interval $2 day)</value>
+    <description>Rewriting the HQL to optimized sql queries</description>
+  </property>
 </configuration>
\ No newline at end of file