You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2018/08/15 04:44:06 UTC

[GitHub] mistercrunch closed pull request #5632: Fetch a batch of rows from bigquery

mistercrunch closed pull request #5632: Fetch a batch of rows from bigquery
URL: https://github.com/apache/incubator-superset/pull/5632
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index c07910af4d..65289e1e19 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -102,6 +102,7 @@ class BaseEngineSpec(object):
     inner_joins = True
     allows_subquery = True
     consistent_case_sensitivity = True  # do results have same case as qry for col names?
+    arraysize = None
 
     @classmethod
     def get_time_grains(cls):
@@ -115,6 +116,8 @@ def get_time_grains(cls):
 
     @classmethod
     def fetch_data(cls, cursor, limit):
+        if cls.arraysize:
+            cursor.arraysize = cls.arraysize
         if cls.limit_method == LimitMethod.FETCH_MANY:
             return cursor.fetchmany(limit)
         return cursor.fetchall()
@@ -1367,6 +1370,18 @@ class BQEngineSpec(BaseEngineSpec):
     As contributed by @mxmzdlv on issue #945"""
     engine = 'bigquery'
 
+    """
+    https://www.python.org/dev/peps/pep-0249/#arraysize
+    raw_connections bypass the pybigquery query execution context and deal with
+    raw dbapi connection directly.
+    If this value is not set, the default value is set to 1, as described here,
+    https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/dbapi/cursor.html#Cursor
+
+    The default value of 5000 is derived from the pybigquery.
+    https://github.com/mxmzdlv/pybigquery/blob/d214bb089ca0807ca9aaa6ce4d5a01172d40264e/pybigquery/sqlalchemy_bigquery.py#L102
+    """
+    arraysize = 5000
+
     time_grain_functions = {
         None: '{col}',
         'PT1S': 'TIMESTAMP_TRUNC({col}, SECOND)',


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org