You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2018/08/15 04:44:07 UTC
[incubator-superset] branch master updated: Fetch a batch of rows
from bigquery (#5632)
This is an automated email from the ASF dual-hosted git repository.
maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new c9bd5a6 Fetch a batch of rows from bigquery (#5632)
c9bd5a6 is described below
commit c9bd5a6167728c533ea39405b5ae91f527561ae6
Author: Sumedh Sakdeo <77...@users.noreply.github.com>
AuthorDate: Tue Aug 14 21:44:04 2018 -0700
Fetch a batch of rows from bigquery (#5632)
* Fetch a batch of rows from bigquery
* unused const
* review comments
---
superset/db_engine_specs.py | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index c07910a..65289e1 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -102,6 +102,7 @@ class BaseEngineSpec(object):
inner_joins = True
allows_subquery = True
consistent_case_sensitivity = True # do results have same case as qry for col names?
+ arraysize = None
@classmethod
def get_time_grains(cls):
@@ -115,6 +116,8 @@ class BaseEngineSpec(object):
@classmethod
def fetch_data(cls, cursor, limit):
+ if cls.arraysize:
+ cursor.arraysize = cls.arraysize
if cls.limit_method == LimitMethod.FETCH_MANY:
return cursor.fetchmany(limit)
return cursor.fetchall()
@@ -1367,6 +1370,18 @@ class BQEngineSpec(BaseEngineSpec):
As contributed by @mxmzdlv on issue #945"""
engine = 'bigquery'
+ """
+ https://www.python.org/dev/peps/pep-0249/#arraysize
+ raw_connections bypass the pybigquery query execution context and deal with
+ raw dbapi connection directly.
+ If this value is not set, the default value is set to 1, as described here,
+ https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor
+
+ The default value of 5000 is derived from the pybigquery.
+ https://github.com/mxmzdlv/pybigquery/blob/d214bb089ca0807ca9aaa6ce4d5a01172d40264e/pybigquery/sqlalchemy_bigquery.py#L102
+ """
+ arraysize = 5000
+
time_grain_functions = {
None: '{col}',
'PT1S': 'TIMESTAMP_TRUNC({col}, SECOND)',