You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by "abhishekrb19 (via GitHub)" <gi...@apache.org> on 2023/02/24 16:41:00 UTC
[GitHub] [druid] abhishekrb19 commented on a diff in pull request #13787: Python Druid API for use in notebooks

abhishekrb19 commented on code in PR #13787:
URL: https://github.com/apache/druid/pull/13787#discussion_r1115214820


##########
examples/quickstart/jupyter-notebooks/druidapi/datasource.py:
##########
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests, time
+from .consts import COORD_BASE
+from .rest import check_error
+from .util import dict_get
+
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DATASOURCE = REQ_DATASOURCES + '/{}'
+
+# Segment load status
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DS_LOAD_STATUS = REQ_DATASOURCES + '/{}/loadstatus'
+
+class DatasourceClient:
+    '''
+    Client for status APIs. These APIs are available on all nodes.
+    If used with the router, they report the status of just the router.
+    '''
+    
+    def __init__(self, rest_client):
+        self.client = rest_client
+
+    def names(self, include_unused=False, include_disabled=False):
+        """
+        Returns a list of the names of data sources in the metadata store.
+        
+        Parameters
+        ----------
+        include_unused : bool, default = False
+            if False, returns only datasources with at least one used segment
+            in the cluster.
+
+        include_unused : bool, default = False
+            if False, returns only enamed datasources.
+
+        Reference
+        ---------
+        * `GET /druid/coordinator/v1/metadata/datasources`
+        * `GET /druid/coordinator/v1/metadata/datasources?includeUnused`
+        * `GET /druid/coordinator/v1/metadata/datasources?includeDisabled`
+
+        See https://druid.apache.org/docs/latest/operations/api-reference.html#get-4
+
+        Note: this method uses a semi-deprecated API.
+        See `Metadata.user_table_names()` for the preferred solution.
+        """
+        params = {}
+        if include_unused:
+            params['includeUnused'] = ''
+        if include_disabled:
+            params['includeDisabled'] = ''
+        return self.client.get_json(REQ_DATASOURCES, params=params)
+    
+    def drop(self, ds_name, ifExists=False):
+        """
+        Drops a data source.
+
+        Marks as unused all segments belonging to a datasource. 
+
+        Marking all segments as unused is equivalent to dropping the table.
+        
+        Parameters
+        ----------
+        ds_name: str
+            name of the datasource to query
+
+        Returns
+        -------
+        Returns a map of the form 
+        {"numChangedSegments": <number>} with the number of segments in the database whose 
+        state has been changed (that is, the segments were marked as unused) as the result 
+        of this API call.
+
+        Reference
+        ---------
+        `DELETE /druid/coordinator/v1/datasources/{dataSourceName}`
+        """
+        r = self.client.delete(REQ_DATASOURCE, args=[ds_name])
+        if ifExists and r.status_code == requests.codes.not_found:
+            return
+        check_error(r)
+
+    def load_status_req(self, ds_name, params=None):
+        return self.rest_client.get_json(REQ_DS_LOAD_STATUS, args=[ds_name], params=params)
+    
+    def load_status(self, ds_name):
+        return self.load_status_req(ds_name, {
+            'forceMetadataRefresh': 'true', 
+            'interval': '1970-01-01/2999-01-01'})
+
+    def wait_until_ready(self, ds_name):
+        while True:
+            resp = self.load_status(ds_name)
+            if dict_get(resp, ds_name) == 100.0:
+                return
+            time.sleep(0.5)
+            

Review Comment:
   nit: newline



##########
examples/quickstart/jupyter-notebooks/druidapi/display.py:
##########
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TEXT_TABLE = 0
+HTML_TABLE = 1
+
+class Display:
+
+    def __init__(self):
+        self.format = TEXT_TABLE
+        self.html_initialized = False
+
+    def text(self):
+        self.format = TEXT_TABLE
+
+    def html(self):
+        self.format = HTML_TABLE
+        if not self.html_initialized:
+            from .html_table import styles
+            styles()
+            self.html_initialized = True
+    
+    def table(self):
+        if self.format == HTML_TABLE:
+            from .html_table import HtmlTable

Review Comment:
   Is there a reason why the imports are inlined here instead of importing them globally at the top? I imagine for performance reasons, but those imports seem relatively lightweight to import globally once.



##########
examples/quickstart/jupyter-notebooks/README.md:
##########
@@ -21,32 +26,35 @@
   ~ under the License.
   -->
 
-You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These tutorials provide snippets of Python code that you can use to run calls against the Druid API to complete the tutorial.
+You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These
+tutorials provide snippets of Python code that you can use to run calls against

Review Comment:
   Thanks for wrapping these long lines. It's easier on the eyes :) 



##########
docs/tutorials/tutorial-jupyter-index.md:
##########
@@ -22,50 +22,85 @@ title: "Jupyter Notebook tutorials"
   ~ under the License.
   -->
 
-<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md share a lot of the same content. If you make a change in one place, update the other too. -->
+<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md
+    share a lot of the same content. If you make a change in one place, update the other
+    too. -->
 
-You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These tutorials provide snippets of Python code that you can use to run calls against the Druid API to complete the tutorial.
+You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These
+tutorials provide snippets of Python code that you can use to run calls against
+the Druid API to complete the tutorial.
 
 ## Prerequisites 
 
 Make sure you meet the following requirements before starting the Jupyter-based tutorials:
 
-- Python 3 
+- Python 3
+
+- The `requests` package for Python. For example, you can install it with the following command:
 
-- The `requests` package for Python. For example, you can install it with the following command: 
-   
    ```bash
    pip3 install requests
    ```
 
-- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid and Jupyter both try to use port `8888,` so start Jupyter on a different port.
+- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid
+  and Jupyter both try to use port `8888`, so start Jupyter on a different port.
 
   - Install JupyterLab or Notebook:
-  
-     ```bash
-     # Install JupyterLab
-     pip3 install jupyterlab  
-     # Install Jupyter Notebook
-     pip3 install notebook
-     ```
-  - Start Jupyter
-      - JupyterLab 
+
+    ```bash
+    # Install JupyterLab
+    pip3 install jupyterlab

Review Comment:
   Something to consider for ease of use/installation in the future - these dependencies can come from a `requirements.txt` file. But, for now, it's just a few, so this is ok.



##########
examples/quickstart/jupyter-notebooks/druidapi/datasource.py:
##########
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests, time
+from .consts import COORD_BASE
+from .rest import check_error
+from .util import dict_get
+
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DATASOURCE = REQ_DATASOURCES + '/{}'
+
+# Segment load status
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DS_LOAD_STATUS = REQ_DATASOURCES + '/{}/loadstatus'
+
+class DatasourceClient:
+    '''
+    Client for status APIs. These APIs are available on all nodes.
+    If used with the router, they report the status of just the router.
+    '''
+    
+    def __init__(self, rest_client):
+        self.rest_client = rest_client
+
+    def names(self, include_unused=False, include_disabled=False):
+        """
+        Returns a list of the names of data sources in the metadata store.
+        
+        Parameters
+        ----------
+        include_unused : bool, default = False
+            if False, returns only datasources with at least one used segment
+            in the cluster.
+
+        include_unused : bool, default = False
+            if False, returns only enamed datasources.
+
+        Reference
+        ---------
+        * `GET /druid/coordinator/v1/metadata/datasources`
+        * `GET /druid/coordinator/v1/metadata/datasources?includeUnused`
+        * `GET /druid/coordinator/v1/metadata/datasources?includeDisabled`
+
+        See https://druid.apache.org/docs/latest/operations/api-reference.html#get-4
+
+        Note: this method uses a semi-deprecated API.
+        See `Metadata.user_table_names()` for the preferred solution.
+        """
+        params = {}
+        if include_unused:
+            params['includeUnused'] = ''
+        if include_disabled:
+            params['includeDisabled'] = ''
+        return self.rest_client.get_json(REQ_DATASOURCES, params=params)
+    
+    def drop(self, ds_name, ifExists=False):

Review Comment:
   nit: use snake case `if_exists`



##########
examples/quickstart/jupyter-notebooks/api-tutorial.ipynb:
##########
@@ -58,29 +60,38 @@
    "source": [
     "## Prerequisites\n",
     "\n",
-    "You'll need install the Requests library for Python before you start. For example:\n",
+    "You'll need install the [Requests](https://requests.readthedocs.io/en/latest/) library for Python before you start. For example:\n",

Review Comment:
   "You'll need to..."



##########
examples/quickstart/jupyter-notebooks/druidapi/sql.py:
##########
@@ -0,0 +1,690 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time, requests
+from . import consts, display
+from .consts import ROUTER_BASE
+from .util import is_blank, dict_get
+from .error import DruidError, ClientError
+
+REQ_ROUTER_QUERY = ROUTER_BASE
+REQ_ROUTER_SQL = ROUTER_BASE + '/sql'
+REQ_ROUTER_SQL_TASK = REQ_ROUTER_SQL + '/task'
+
+class SqlRequest:
+
+    def __init__(self, query_client, sql):
+        self.query_client = query_client
+        self.sql = sql
+        self.context = None
+        self.params = None
+        self.header = False
+        self.format = consts.SQL_OBJECT
+        self.headers = None
+        self.types = None
+        self.sqlTypes = None
+
+    def with_format(self, result_format):
+        self.format = result_format
+        return self
+
+    def with_headers(self, sqlTypes=False, druidTypes=False):
+        self.headers = True
+        self.types = druidTypes
+        self.sqlTypes = sqlTypes
+        return self
+
+    def with_context(self, context):
+        if self.context is None:
+            self.context = context
+        else:
+            self.context.update(context)
+        return self
+
+    def with_parameters(self, params):
+        '''
+        Set the array of parameters. Parameters must each be a map of 'type'/'value' pairs:
+        {'type': the_type, 'value': the_value}. The type must be a valid SQL type
+        (in upper case). See the consts module for a list.
+        '''
+        if self.params is None:
+            self.params = params
+        else:
+            self.params.update(params)
+        return self
+
+    def add_parameter(self, value):
+        '''
+        Add one parameter value. Infers the type of the parameter from the Python type.
+        '''
+        if value is None:
+            raise ClientError("Druid does not support null parameter values")
+        data_type = None
+        value_type = type(value)
+        if value_type is str:
+            data_type = consts.SQL_VARCHAR_TYPE
+        elif value_type is int:
+            data_type = consts.SQL_BIGINT_TYPE
+        elif value_type is float:
+            data_type = consts.SQL_DOUBLE_TYPE
+        elif value_type is list:
+            data_type = consts.SQL_ARRAY_TYPE
+        else:
+            raise ClientError("Unsupported value type")
+        if self.params is None:
+            self.params = []
+        self.params.append({'type': data_type, 'value': value})
+
+    def response_header(self):
+        self.header = True
+        return self
+
+    def request_headers(self, headers):
+        self.headers = headers
+        return self
+
+    def to_request(self):
+        query_obj = {"query": self.sql}
+        if self.context is not None and len(self.context) > 0:
+            query_obj['context'] = self.context
+        if self.params is not None and len(self.params) > 0:
+            query_obj['parameters'] = self.params
+        if self.header:
+            query_obj['header'] = True
+        if self.result_format is not None:
+            query_obj['resultFormat'] = self.format
+        if self.sqlTypes:
+            query_obj['sqlTypesHeader'] = self.sqlTypes
+        if self.types:
+            query_obj['typesHeader'] = self.types
+        return query_obj
+
+    def result_format(self):
+        return self.format.lower()
+
+    def run(self):
+        return self.query_client.sql_query(self)
+
+def parse_rows(fmt, context, results):
+    if fmt == consts.SQL_ARRAY_WITH_TRAILER:
+        rows = results['results']
+    elif fmt == consts.SQL_ARRAY:
+        rows = results
+    else:
+        return results
+    if not context.get('headers', False):
+        return rows
+    header_size = 1
+    if context.get('sqlTypesHeader', False):
+        header_size += 1
+    if context.get('typesHeader', False):
+        header_size += 1
+    return rows[header_size:]
+
+def label_non_null_cols(results):
+    if results is None or len(results) == 0:

Review Comment:
   A more "pythonic" way to do this:
   ```python3
   if not results:
   ```
   It'd check for `None`, `[]` among other things.
   



##########
examples/quickstart/jupyter-notebooks/druidapi/sql.py:
##########
@@ -0,0 +1,690 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time, requests
+from . import consts, display
+from .consts import ROUTER_BASE
+from .util import is_blank, dict_get
+from .error import DruidError, ClientError
+
+REQ_ROUTER_QUERY = ROUTER_BASE
+REQ_ROUTER_SQL = ROUTER_BASE + '/sql'
+REQ_ROUTER_SQL_TASK = REQ_ROUTER_SQL + '/task'
+
+class SqlRequest:
+
+    def __init__(self, query_client, sql):
+        self.query_client = query_client
+        self.sql = sql
+        self.context = None
+        self.params = None
+        self.header = False
+        self.format = consts.SQL_OBJECT
+        self.headers = None
+        self.types = None
+        self.sqlTypes = None

Review Comment:
   `sql_types`



##########
examples/quickstart/jupyter-notebooks/druidapi/tasks.py:
##########
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .consts import OVERLORD_BASE
+
+# Tasks
+REQ_TASKS = OVERLORD_BASE + '/tasks'
+REQ_POST_TASK = OVERLORD_BASE + '/task'
+REQ_GET_TASK = REQ_POST_TASK + '/{}'
+REQ_TASK_STATUS = REQ_GET_TASK + '/status'
+REQ_TASK_REPORTS = REQ_GET_TASK + '/reports'
+REQ_END_TASK = REQ_GET_TASK
+REQ_END_DS_TASKS = REQ_END_TASK + '/shutdownAllTasks'
+
+class TaskClient:
+    """
+    Client for task-related APIs. The APIs connect through the Router to
+    the Overlord.
+    """
+    
+    def __init__(self, rest_client):
+        self.client = rest_client
+
+    def tasks(self, state=None, table=None, type=None, max=None, created_time_interval=None):
+        '''
+        Retrieve list of tasks.
+
+        Parameters
+        ----------
+        state : str, default = None
+        	Filter list of tasks by task state. Valid options are "running", 
+            "complete", "waiting", and "pending". Constants are defined for
+            each of these in the `consts` file.

Review Comment:
   The indentation generally seems a little off in this comment block.



##########
examples/quickstart/jupyter-notebooks/druidapi/datasource.py:
##########
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests, time
+from .consts import COORD_BASE
+from .rest import check_error
+from .util import dict_get
+
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DATASOURCE = REQ_DATASOURCES + '/{}'
+
+# Segment load status
+REQ_DATASOURCES = COORD_BASE + '/datasources'
+REQ_DS_LOAD_STATUS = REQ_DATASOURCES + '/{}/loadstatus'
+
+class DatasourceClient:
+    '''
+    Client for status APIs. These APIs are available on all nodes.

Review Comment:
   The comment here refers to status APIs instead of datasource APIs - seems like a copy-paste issue.



##########
docs/tutorials/tutorial-jupyter-index.md:
##########
@@ -22,51 +22,86 @@ title: "Jupyter Notebook tutorials"
   ~ under the License.
   -->
 
-<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md share a lot of the same content. If you make a change in one place, update the other too. -->
+<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md
+    share a lot of the same content. If you make a change in one place, update the other
+    too. -->
 
-You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These tutorials provide snippets of Python code that you can use to run calls against the Druid API to complete the tutorial.
+You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These
+tutorials provide snippets of Python code that you can use to run calls against
+the Druid API to complete the tutorial.
 
 ## Prerequisites 
 
 Make sure you meet the following requirements before starting the Jupyter-based tutorials:
 
-- Python 3 
+- Python 3

Review Comment:
   Is there a minimum version of python3 that should be recommended for installation? For example, something like version 3.7 or so would be nice that supports f-strings and other newer features.



##########
examples/quickstart/jupyter-notebooks/druidapi/html_table.py:
##########
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from IPython.display import display, HTML
+from .base_table import BaseTable
+from html import escape
+
+STYLES = '''
+<style>
+  .druid table {
+    border: 1px solid black;
+    border-collapse: collapse;
+  }
+
+  .druid th, .druid td {
+    padding: 4px 1em ;
+    text-align: left;
+  }
+
+  td.druid-right, th.druid-right {
+    text-align: right;
+  }
+
+  td.druid-center, th.druid-center {
+    text-align: center;
+  }
+
+  .druid .druid-left {
+    text-align: left;
+  }
+
+  .druid-alert {
+    color: red;
+  }
+</style>
+'''
+
+def escape_for_html(s):
+    # Anoying: IPython treats $ as the start of Latex, which is cool,
+    # but not wanted here.
+    return s.replace('$', '\\$')
+
+def html(s):
+    s =  '<div class="druid">' + escape_for_html(s) + '</div>'
+    display(HTML(s))
+
+def html_error(s):
+    s =  '<div class="druid-alert">' + escape_for_html(s.replace('\n', '<br/>')) + '</div>'
+    display(HTML(s))
+
+def styles():
+    display(HTML(STYLES))
+
+alignments = ['druid-left', 'druid-center', 'druid-right']
+
+def start_tag(tag, align):
+    s = '<' + tag
+    if align is not None:
+        s += ' class="{}"'.format(alignments[align])
+    return s + '>'
+
+class HtmlTable(BaseTable):
+
+    def __init__(self):
+        BaseTable.__init__(self)
+
+    def widths(self, widths):
+        self._widths = widths
+
+    def format(self, rows):
+        _, width = self.row_width(rows)
+        headers = self.pad_headers(width)
+        rows = self.pad_rows(rows, width)
+        s = '<table>\n'
+        s += self.gen_header(headers)
+        s += self.gen_rows(rows)
+        return s + '\n</table>'
+
+    def show(self, rows):
+        html(self.format(rows))
+
+    def gen_header(self, headers):
+        if headers is None or len(headers) == 0:
+            return ''
+        s = '<tr>'
+        for i in range(len(headers)):
+            s += start_tag('th', self.col_align(i)) + escape(headers[i]) + '</th>'
+        return s + '</tr>\n'
+
+    def gen_rows(self, rows):
+        html_rows = []
+        for row in rows:
+            r = "<tr>"
+            for i in range(len(row)):
+                r += start_tag('td', self.col_align(i))
+                cell = row[i]
+                value = '' if cell is None else escape(str(cell))
+                r += value + '</td>'
+            html_rows.append(r + "</tr>")
+        return "\n".join(html_rows)
+
+    def col_align(self, col):
+        if self._align is None:
+            return None
+        if col >= len(self._align):
+            return None
+        return self._align[col]

Review Comment:
   nit: newline



##########
docs/tutorials/tutorial-jupyter-index.md:
##########
@@ -22,51 +22,86 @@ title: "Jupyter Notebook tutorials"
   ~ under the License.
   -->
 
-<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md share a lot of the same content. If you make a change in one place, update the other too. -->
+<!-- tutorial-jupyter-index.md and examples/quickstart/juptyer-notebooks/README.md
+    share a lot of the same content. If you make a change in one place, update the other
+    too. -->
 
-You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These tutorials provide snippets of Python code that you can use to run calls against the Druid API to complete the tutorial.
+You can try out the Druid APIs using the Jupyter Notebook-based tutorials. These
+tutorials provide snippets of Python code that you can use to run calls against
+the Druid API to complete the tutorial.
 
 ## Prerequisites 
 
 Make sure you meet the following requirements before starting the Jupyter-based tutorials:
 
-- Python 3 
+- Python 3
+
+- The `requests` package for Python. For example, you can install it with the following command:
 
-- The `requests` package for Python. For example, you can install it with the following command: 
-   
    ```bash
    pip3 install requests
    ```
 
-- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid and Jupyter both try to use port `8888,` so start Jupyter on a different port.
+- JupyterLab (recommended) or Jupyter Notebook running on a non-default port. By default, Druid
+  and Jupyter both try to use port `8888`, so start Jupyter on a different port.
 
   - Install JupyterLab or Notebook:
-  
-     ```bash
-     # Install JupyterLab
-     pip3 install jupyterlab  
-     # Install Jupyter Notebook
-     pip3 install notebook
-     ```
-  - Start JupyterLab
-  
+
+    ```bash
+    # Install JupyterLab
+    pip3 install jupyterlab
+    # Install Jupyter Notebook
+    pip3 install notebook
+    ```
+  - Start Jupyter using either JupyterLab
+    ```bash
+    # Start JupyterLab on port 3001
+    jupyter lab --port 3001
+    ```
+
+    Or using Jupyter Notebook
     ```bash
-     # Start JupyterLab on port 3001
-     jupyter lab --port 3001
-     ```
-   - Alternatively, start Jupyter Notebook
-     ```bash
-     # Start Jupyter Notebook on port 3001
-     jupyter notebook --port 3001
-     ```
+    # Start Jupyter Notebook on port 3001
+    jupyter notebook --port 3001
+    ```
+
+- An available Druid instance. You can use the [Quickstart (local)](./index.md) instance. The tutorials
+  assume that you are using the quickstart, so no authentication or authorization
+  is expected unless explicitly mentioned.
+
+  If you contribute to Druid, and work with Druid integration tests, can use a test cluster.
+  Assume you have an environment variable, `DRUID_DEV`, which identifies your Druid source repo.
+
+  ```bash
+  cd $DRUID_DEV
+  ./it.sh build
+  ./it.sh image
+  ./it.sh up <category>
+  ```
+
+  Replace `<catagory>` with one of the available integration test categories. See the integration 
+  test `README.md` for details.
+
+## Simple Druid API
 
-- An available Druid instance. You can use the [Quickstart (local)](./index.md) instance. The tutorials assume that you are using the quickstart, so no authentication or authorization is expected unless explicitly mentioned.
+One of the notebooks shows how to use the Druid REST API. The others focus on other
+topics and use a simple set of Python wrappers around the underlying REST API. The
+wrappers reside in the `druidapi` package within the notebooks directory. While the package

Review Comment:
   Wonder if it'd make sense to pull the python package outside the context of the jupyter notebook so it can be reused for other things?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org