You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2018/08/15 04:44:07 UTC

[incubator-superset] branch master updated: Fetch a batch of rows from bigquery (#5632)

This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new c9bd5a6  Fetch a batch of rows from bigquery (#5632)
c9bd5a6 is described below

commit c9bd5a6167728c533ea39405b5ae91f527561ae6
Author: Sumedh Sakdeo <77...@users.noreply.github.com>
AuthorDate: Tue Aug 14 21:44:04 2018 -0700

    Fetch a batch of rows from bigquery (#5632)
    
    * Fetch a batch of rows from bigquery
    
    * unused const
    
    * review comments
---
 superset/db_engine_specs.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index c07910a..65289e1 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -102,6 +102,7 @@ class BaseEngineSpec(object):
     inner_joins = True
     allows_subquery = True
     consistent_case_sensitivity = True  # do results have same case as qry for col names?
+    arraysize = None
 
     @classmethod
     def get_time_grains(cls):
@@ -115,6 +116,8 @@ class BaseEngineSpec(object):
 
     @classmethod
     def fetch_data(cls, cursor, limit):
+        if cls.arraysize:
+            cursor.arraysize = cls.arraysize
         if cls.limit_method == LimitMethod.FETCH_MANY:
             return cursor.fetchmany(limit)
         return cursor.fetchall()
@@ -1367,6 +1370,18 @@ class BQEngineSpec(BaseEngineSpec):
     As contributed by @mxmzdlv on issue #945"""
     engine = 'bigquery'
 
+    """
+    https://www.python.org/dev/peps/pep-0249/#arraysize
+    raw_connections bypass the pybigquery query execution context and deal with
+    raw dbapi connection directly.
+    If this value is not set, the default value is set to 1, as described here,
+    https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor
+
+    The default value of 5000 is derived from the pybigquery.
+    https://github.com/mxmzdlv/pybigquery/blob/d214bb089ca0807ca9aaa6ce4d5a01172d40264e/pybigquery/sqlalchemy_bigquery.py#L102
+    """
+    arraysize = 5000
+
     time_grain_functions = {
         None: '{col}',
         'PT1S': 'TIMESTAMP_TRUNC({col}, SECOND)',