You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2019/02/26 06:21:05 UTC

[metamodel] 03/11: add pagination function to the Hive sql. 1.Version 1 only support row_number() ; 2.Version 2 support limit offset ,ref https://issues.apache.org/jira/browse/HIVE-11531

This is an automated email from the ASF dual-hosted git repository.

kaspersor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metamodel.git

commit 71a02aa9bbfb7ed80a257ea6afa779e86fcc5135
Author: 李小保 <li...@mininglamp.com>
AuthorDate: Tue Jan 29 14:58:00 2019 +0800

    add pagination function to the Hive sql.
    1.Version 1 only support row_number() ;
    2.Version 2 support limit offset ,ref https://issues.apache.org/jira/browse/HIVE-11531
---
 .../metamodel/jdbc/dialects/HiveQueryRewriter.java | 136 +++++++++++++++++++++
 .../metamodel/dialects/HiveQueryRewriterTest.java  |  85 +++++++++++++
 2 files changed, 221 insertions(+)

diff --git a/jdbc/src/main/java/org/apache/metamodel/jdbc/dialects/HiveQueryRewriter.java b/jdbc/src/main/java/org/apache/metamodel/jdbc/dialects/HiveQueryRewriter.java
index 6944ab8..3f28aac 100644
--- a/jdbc/src/main/java/org/apache/metamodel/jdbc/dialects/HiveQueryRewriter.java
+++ b/jdbc/src/main/java/org/apache/metamodel/jdbc/dialects/HiveQueryRewriter.java
@@ -18,7 +18,12 @@
  */
 package org.apache.metamodel.jdbc.dialects;
 
+import java.util.List;
+import org.apache.metamodel.MetaModelException;
 import org.apache.metamodel.jdbc.JdbcDataContext;
+import org.apache.metamodel.query.FromItem;
+import org.apache.metamodel.query.Query;
+import org.apache.metamodel.query.SelectItem;
 import org.apache.metamodel.schema.ColumnType;
 
 /**
@@ -26,8 +31,139 @@ import org.apache.metamodel.schema.ColumnType;
  */
 public class HiveQueryRewriter extends DefaultQueryRewriter {
 
+    private int majorVersion;
+
     public HiveQueryRewriter(JdbcDataContext dataContext) {
         super(dataContext);
+        String version = dataContext.getDatabaseVersion();
+        String[] parts = version.split("\\.");
+        if(parts.length < 2) {
+            throw new RuntimeException("Illegal Hive Version: " + version + " (expected A.B.* format)");
+        } else {
+            majorVersion = Integer.valueOf(parts[0]);
+        }
+    }
+
+    @Override
+    public final boolean isFirstRowSupported(final Query query) {
+        switch (majorVersion){
+            case 2:
+            case 3:
+                return true;
+            default:
+                return super.isFirstRowSupported(query);
+        }
+    }
+
+    @Override
+    public final boolean isMaxRowsSupported() {
+        switch (majorVersion){
+            case 2:
+            case 3:
+                return true;
+            default:
+                return super.isMaxRowsSupported();
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * If the Max rows and/or First row property of the query is set, then we
+     * will use the database's LIMIT and OFFSET functions.
+     */
+    @Override
+    public String rewriteQuery(Query query) {
+        switch (majorVersion){
+            case 2:
+            case 3:
+                return rewriteQueryForHive2(query);
+            default:
+                return rewriteQueryForHive1(query);
+        }
+
+    }
+
+    private String rewriteQueryForHive1(Query query){
+
+        Integer maxRows = query.getMaxRows();
+        Integer firstRow = query.getFirstRow();
+
+        if(firstRow != null && firstRow > 1){
+            if(query.getOrderByClause().getItemCount() == 0){
+                throw new MetaModelException("OFFSET requires an ORDER BY clause");
+            }
+        }
+
+
+        if (maxRows == null && (firstRow == null || firstRow.intValue() == 1)) {
+            return super.rewriteQuery(query);
+        }
+
+        if ((firstRow == null || firstRow.intValue() == 1) && maxRows != null && maxRows > 0) {
+            // We prefer to use the "LIMIT n" approach, if
+            // firstRow is not specified.
+            return super.rewriteQuery(query) + " LIMIT " + maxRows;
+        }
+
+        final Query innerQuery = query.clone();
+        innerQuery.setFirstRow(null);
+        innerQuery.setMaxRows(null);
+
+        final Query outerQuery = new Query();
+        final FromItem subQuerySelectItem = new FromItem(innerQuery).setAlias("metamodel_subquery");
+        outerQuery.from(subQuerySelectItem);
+
+        final List<SelectItem> innerSelectItems = innerQuery.getSelectClause().getItems();
+        for (SelectItem selectItem : innerSelectItems) {
+            outerQuery.select(new SelectItem(selectItem, subQuerySelectItem));
+        }
+
+
+        final String rewrittenOrderByClause = rewriteOrderByClause(innerQuery, innerQuery.getOrderByClause());
+        final String rowOver = "ROW_NUMBER() " + "OVER(" + rewrittenOrderByClause + ")";
+        innerQuery.select(new SelectItem(rowOver, "metamodel_row_number"));
+        innerQuery.getOrderByClause().removeItems();
+
+        final String baseQueryString = rewriteQuery(outerQuery);
+
+        if (maxRows == null) {
+            return baseQueryString + " WHERE metamodel_row_number > " + (firstRow - 1);
+        }
+
+        return baseQueryString + " WHERE metamodel_row_number BETWEEN " + firstRow + " AND "
+                + (firstRow - 1 + maxRows);
+
+    }
+
+    private String rewriteQueryForHive2(Query query){
+        Integer maxRows = query.getMaxRows();
+        Integer firstRow = query.getFirstRow();
+
+        if(firstRow != null && firstRow > 1){
+            if(query.getOrderByClause().getItemCount() == 0){
+                throw new MetaModelException("OFFSET requires an ORDER BY clause");
+            }
+        }
+
+        String queryString = super.rewriteQuery(query);
+
+        if (maxRows != null || firstRow != null) {
+
+            if (maxRows == null) {
+                maxRows = Integer.MAX_VALUE;
+            }
+            queryString = queryString + " LIMIT " + maxRows;
+
+            if (firstRow != null && firstRow > 1) {
+                // offset is 0-based
+                int offset = firstRow - 1;
+                queryString = queryString + " OFFSET " + offset;
+            }
+        }
+
+        return queryString;
+
     }
 
     @Override
diff --git a/jdbc/src/test/java/org/apache/metamodel/dialects/HiveQueryRewriterTest.java b/jdbc/src/test/java/org/apache/metamodel/dialects/HiveQueryRewriterTest.java
new file mode 100644
index 0000000..423b120
--- /dev/null
+++ b/jdbc/src/test/java/org/apache/metamodel/dialects/HiveQueryRewriterTest.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.dialects;
+
+import static org.apache.metamodel.jdbc.JdbcDataContext.DATABASE_PRODUCT_HIVE;
+import static org.apache.metamodel.jdbc.JdbcDataContext.DATABASE_PRODUCT_SQLSERVER;
+
+import junit.framework.TestCase;
+import org.apache.metamodel.jdbc.JdbcDataContext;
+import org.apache.metamodel.jdbc.dialects.HiveQueryRewriter;
+import org.apache.metamodel.jdbc.dialects.MysqlQueryRewriter;
+import org.apache.metamodel.jdbc.dialects.SQLServerQueryRewriter;
+import org.apache.metamodel.query.FilterItem;
+import org.apache.metamodel.query.FromItem;
+import org.apache.metamodel.query.OperatorType;
+import org.apache.metamodel.query.Query;
+import org.apache.metamodel.query.SelectItem;
+import org.apache.metamodel.schema.ColumnType;
+import org.apache.metamodel.schema.MutableColumn;
+import org.apache.metamodel.schema.MutableSchema;
+import org.apache.metamodel.schema.MutableTable;
+import org.apache.metamodel.util.TimeComparator;
+import org.easymock.EasyMock;
+import org.junit.Assert;
+
+public class HiveQueryRewriterTest extends TestCase {
+
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+
+
+    }
+
+    public void testHive1SqlWithPagination() {
+        final JdbcDataContext mockContext = EasyMock.createMock(JdbcDataContext.class);
+        EasyMock.expect(mockContext.getDatabaseProductName()).andReturn(DATABASE_PRODUCT_HIVE).anyTimes();
+        EasyMock.expect(mockContext.getDatabaseVersion()).andReturn("1.1.1.1").anyTimes();
+        EasyMock.expect(mockContext.getIdentifierQuoteString()).andReturn("quoteString").anyTimes();
+
+        EasyMock.replay(mockContext);
+        HiveQueryRewriter qr = new HiveQueryRewriter(mockContext);
+
+        MutableColumn col1 = new MutableColumn("kkbh");
+        MutableColumn col2 = new MutableColumn("kkmc");
+        Query q = new Query().from(new MutableTable("5_t_kk_kkxx")).select(col1).select(col2)
+                .where(col1, OperatorType.EQUALS_TO, "5207281832").orderBy(col1).setFirstRow(5).setMaxRows(9);
+        String sql = qr.rewriteQuery(q);
+        assertEquals(sql,"SELECT metamodel_subquery.kkbh, metamodel_subquery.kkmc FROM (SELECT kkbh, kkmc, ROW_NUMBER() OVER( ORDER BY kkbh ASC) AS metamodel_row_number FROM 5_t_kk_kkxx WHERE kkbh = '5207281832') metamodel_subquery WHERE metamodel_row_number BETWEEN 5 AND 13");
+    }
+
+    public void testHive2SqlWithPagination() {
+        final JdbcDataContext mockContext = EasyMock.createMock(JdbcDataContext.class);
+        EasyMock.expect(mockContext.getDatabaseProductName()).andReturn(DATABASE_PRODUCT_HIVE).anyTimes();
+        EasyMock.expect(mockContext.getDatabaseVersion()).andReturn("2.1.1.1").anyTimes();
+        EasyMock.expect(mockContext.getIdentifierQuoteString()).andReturn("quoteString").anyTimes();
+
+        EasyMock.replay(mockContext);
+        HiveQueryRewriter qr = new HiveQueryRewriter(mockContext);
+
+        MutableColumn col1 = new MutableColumn("kkbh");
+        MutableColumn col2 = new MutableColumn("kkmc");
+        Query q = new Query().from(new MutableTable("5_t_kk_kkxx")).select(col1).select(col2)
+                .where(col1, OperatorType.EQUALS_TO, "5207281832").orderBy(col1).setFirstRow(5).setMaxRows(9);
+        String sql = qr.rewriteQuery(q);
+        assertEquals(sql,"SELECT kkbh, kkmc FROM 5_t_kk_kkxx WHERE kkbh = '5207281832' ORDER BY kkbh ASC LIMIT 9 OFFSET 4");
+    }
+}
\ No newline at end of file