You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by "github-code-scanning[bot] (via GitHub)" <gi...@apache.org> on 2023/02/10 00:27:02 UTC
[GitHub] [druid] github-code-scanning[bot] commented on a diff in pull request #13787: Python Druid API for use in notebooks

github-code-scanning[bot] commented on code in PR #13787:
URL: https://github.com/apache/druid/pull/13787#discussion_r1102158185


##########
examples/quickstart/jupyter-notebooks/druidapi/rest.py:
##########
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests
+from .util import dict_get, is_blank
+from urllib.parse import quote
+from .error import ClientError
+
+def check_error(response):
+    """
+    Raises a requests HttpError if the response code is not OK or Accepted.
+
+    If the response inclded a JSON payload, then the message is extracted
+    from that payload, else the message is from requests. The JSON
+    payload, if any, is returned in the json field of the error.
+    """
+    code = response.status_code
+    if code == requests.codes.ok or code == requests.codes.accepted:
+        return
+    error = None
+    json = None
+    try:
+        json = response.json()
+    except Exception: 

Review Comment:
   ## Empty except
   
   'except' clause does nothing but pass and there is no explanatory comment.
   
   [Show more details](https://github.com/apache/druid/security/code-scanning/4256)



##########
examples/quickstart/jupyter-notebooks/druidapi/sql.py:
##########
@@ -0,0 +1,693 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time, requests
+from . import consts, display
+from .consts import ROUTER_BASE
+from .util import is_blank, dict_get
+from .error import DruidError, ClientError
+
+REQ_ROUTER_QUERY = ROUTER_BASE
+REQ_ROUTER_SQL = ROUTER_BASE + '/sql'
+REQ_ROUTER_SQL_TASK = REQ_ROUTER_SQL + '/task'
+
+class SqlRequest:
+
+    def __init__(self, query_client, sql):
+        self.query_client = query_client
+        self.sql = sql
+        self.context = None
+        self.params = None
+        self.header = False
+        self.format = consts.SQL_OBJECT
+        self.headers = None
+        self.types = None
+        self.sqlTypes = None
+    
+    def with_format(self, result_format):
+        self.format = result_format
+        return self
+    
+    def with_headers(self, sqlTypes=False, druidTypes=False):
+        self.headers = True
+        self.types = druidTypes
+        self.sqlTypes = sqlTypes
+        return self
+    
+    def with_context(self, context):
+        if self.context is None:
+            self.context = context
+        else:
+            self.context.update(context)
+        return self
+    
+    def with_parameters(self, params):
+        '''
+        Set the array of parameters. Parameters must each be a map of 'type'/'value' pairs:
+        {'type': the_type, 'value': the_value}. The type must be a valid SQL type
+        (in upper case). See the consts module for a list.
+        '''
+        if self.params is None:
+            self.params = params
+        else:
+            self.params.update(params)
+        return self
+    
+    def add_parameter(self, value):
+        '''
+        Add one parameter value. Infers the type of the parameter from the Python type.
+        '''
+        if value is None:
+            raise ClientError("Druid does not support null parameter values")
+        data_type = None
+        value_type = type(value)
+        if value_type is str:
+            data_type = consts.SQL_VARCHAR_TYPE
+        elif value_type is int:
+            data_type = consts.SQL_BIGINT_TYPE
+        elif value_type is float:
+            data_type = consts.SQL_DOUBLE_TYPE
+        elif value_type is list:
+            data_type = consts.SQL_ARRAY_TYPE
+        else:
+            raise ClientError("Unsupported value type")
+        if self.params is None:
+            self.params = []
+        self.params.append({'type': data_type, 'value': value})
+
+    def response_header(self):
+        self.header = True
+        return self
+
+    def request_headers(self, headers):
+        self.headers = headers
+        return self
+    
+    def to_request(self):
+        query_obj = {"query": self.sql}
+        if self.context is not None and len(self.context) > 0:
+            query_obj['context'] = self.context
+        if self.params is not None and len(self.params) > 0:
+            query_obj['parameters'] = self.params
+        if self.header:
+            query_obj['header'] = True
+        if self.result_format is not None:
+            query_obj['resultFormat'] = self.format
+        if self.sqlTypes:
+            query_obj['sqlTypesHeader'] = self.sqlTypes
+        if self.types:
+            query_obj['typesHeader'] = self.types
+        return query_obj
+
+    def result_format(self):
+        return self.format.lower()
+
+    def run(self):
+        return self.query_client.sql_query(self)
+
+def parse_rows(fmt, context, results):
+    if fmt == consts.SQL_ARRAY_WITH_TRAILER:
+        rows = results['results']
+    elif fmt == consts.SQL_ARRAY:
+        rows = results
+    else:
+        return results
+    if not context.get('headers', False):
+        return rows
+    header_size = 1
+    if context.get('sqlTypesHeader', False):
+        header_size += 1
+    if context.get('typesHeader', False):
+        header_size += 1
+    return rows[header_size:]
+
+def label_non_null_cols(results):
+    if results is None or len(results) == 0:
+        return []
+    is_null = {}
+    for key in results[0].keys():
+        is_null[key] = True
+    for row in results:
+        for key, value in row.items():
+            if type(value) == str:
+                if value != '':
+                    is_null[key] = False
+            elif type(value) == float:
+                if value != 0.0:
+                    is_null[key] = False
+            elif value is not None:
+                is_null[key] = False
+    return is_null
+
+def filter_null_cols(results):
+    '''
+    Filter columns from a Druid result set by removing all null-like
+    columns. A column is considered null if all values for that column
+    are null. A value is null if it is either a JSON null, an empty
+    string, or a numeric 0. All rows are preserved, as is the order
+    of the remaining columns.
+    '''
+    if results is None or len(results) == 0:
+        return results
+    is_null = label_non_null_cols(results)
+    revised = []
+    for row in results:
+        new_row = {}
+        for key, value in row.items():
+            if is_null[key]:
+                continue
+            new_row[key] = value
+        revised.append(new_row)
+    return revised
+
+def parse_object_schema(results):
+    schema = []
+    if len(results) == 0:
+        return schema
+    row = results[0]
+    for k, v in row.items():
+        druid_type = None
+        sql_type = None
+        if type(v) is str:
+            druid_type = consts.DRUID_STRING_TYPE
+            sql_type = consts.SQL_VARCHAR_TYPE
+        elif type(v) is int or type(v) is float:
+            druid_type = consts.DRUID_LONG_TYPE
+            sql_type = consts.SQL_BIGINT_TYPE
+        schema.append(ColumnSchema(k, sql_type, druid_type))
+    return schema
+
+def parse_array_schema(context, results):
+    schema = []
+    if len(results) == 0:
+        return schema
+    has_headers = context.get(consts.HEADERS_KEY, False)
+    if not has_headers:
+        return schema
+    has_sql_types = context.get(consts.SQL_TYPES_HEADERS_KEY, False)
+    has_druid_types = context.get(consts.DRUID_TYPE_HEADERS_KEY, False)
+    size = len(results[0])
+    for i in range(size):
+        druid_type = None
+        if has_druid_types:
+            druid_type = results[1][i]
+        sql_type = None
+        if has_sql_types:
+            sql_type = results[2][i]
+        schema.append(ColumnSchema(results[0][i], sql_type, druid_type))
+    return schema
+
+def parse_schema(fmt, context, results):
+    if fmt == consts.SQL_OBJECT:
+        return parse_object_schema(results)
+    elif fmt == consts.SQL_ARRAY or fmt == consts.SQL_ARRAY_WITH_TRAILER:
+        return parse_array_schema(context, results)
+    else:
+        return []
+
+def is_response_ok(http_response):
+    code = http_response.status_code
+    return code == requests.codes.ok or code == requests.codes.accepted
+ 
+class ColumnSchema:
+
+    def __init__(self, name, sql_type, druid_type):
+        self.name = name
+        self.sql_type = sql_type
+        self.druid_type = druid_type
+
+    def __str__(self):
+        return "{{name={}, SQL type={}, Druid type={}}}".format(self.name, self.sql_type, self.druid_type)
+
+class SqlQueryResult:
+    """
+    Defines the core protocol for Druid SQL queries.
+    """
+
+    def __init__(self, request, response):
+        self.http_response = response
+        self._json = None
+        self._rows = None
+        self._schema = None
+        self.request = request
+        self._error = None
+        self._id = None
+        if not is_response_ok(response):
+            try:
+                self._error = response.json()
+            except Exception:
+                self._error = response.text
+                if self._error is None or len(self._error) == 0:
+                    self._error = "Failed with HTTP status {}".format(response.status_code)
+        try:
+            self._id = self.http_response.headers['X-Druid-SQL-Query-Id']
+        except KeyError:
+            self._error = "Query returned no query ID"
+
+    def result_format(self):
+        return self.request.result_format()
+
+    def ok(self):
+        """
+        Reports if the query succeeded.
+
+        The query rows and schema are available only if ok() returns True.
+        """
+        return is_response_ok(self.http_response)
+    
+    def error(self):

Review Comment:
   ## Variable defined multiple times
   
   This assignment to 'error' is unnecessary as it is [redefined](1) before this value is used.
   
   [Show more details](https://github.com/apache/druid/security/code-scanning/4257)



##########
examples/quickstart/jupyter-notebooks/druidapi/html_table.py:
##########
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from IPython.display import display, HTML
+from .base_table import BaseTable
+from html import escape
+
+STYLES = '''
+<style>
+  .druid table {
+    border: 1px solid black;
+    border-collapse: collapse;
+  }
+
+  .druid th, .druid td {
+    padding: 4px 1em ;
+    text-align: left;
+  }
+
+  td.druid-right, th.druid-right {
+    text-align: right;
+  }
+
+  td.druid-center, th.druid-center {
+    text-align: center;
+  }
+
+  .druid .druid-left {
+    text-align: left;
+  }
+
+  .druid-alert {
+    color: red;
+  }
+</style>
+'''
+
+def escape_for_html(s):
+    # Anoying: IPython treats $ as the start of Latex, which is cool,
+    # but not wanted here.
+    return s.replace('$', '\\$')
+
+def html(s):
+    s =  '<div class="druid">' + escape_for_html(s) + '</div>'
+    display(HTML(s))
+
+def html_error(s):
+    s =  '<div class="druid-alert">' + escape_for_html(s.replace('\n', '<br/>')) + '</div>'
+    display(HTML(s))
+
+def styles():
+    display(HTML(STYLES))
+
+alignments = ['druid-left', 'druid-center', 'druid-right']
+
+def start_tag(tag, align):
+    s = '<' + tag
+    if align is not None:
+        s += ' class="{}"'.format(alignments[align])
+    return s + '>'
+
+class HtmlTable(BaseTable):

Review Comment:
   ## Missing call to `__init__` during object initialization
   
   Class HtmlTable may not be initialized properly as [method BaseTable.__init__](1) is not called from its [__init__ method](2).
   
   [Show more details](https://github.com/apache/druid/security/code-scanning/4255)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org