You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/04/25 22:13:54 UTC

[airflow] 03/03: Address review comments

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit a58506b2a68f0d4533b41feb67efb0caf34e14d8
Author: Alex Ott <al...@gmail.com>
AuthorDate: Sun Apr 24 11:36:28 2022 +0200

    Address review comments
---
 .../providers/databricks/hooks/databricks_sql.py   | 37 ++++++++--------
 docs/apache-airflow-providers-databricks/index.rst |  1 -
 .../operators/copy_into.rst                        | 48 +++------------------
 .../operators/sql.rst                              | 49 +++++-----------------
 4 files changed, 35 insertions(+), 100 deletions(-)

diff --git a/airflow/providers/databricks/hooks/databricks_sql.py b/airflow/providers/databricks/hooks/databricks_sql.py
index aa8245772a..afc165ee20 100644
--- a/airflow/providers/databricks/hooks/databricks_sql.py
+++ b/airflow/providers/databricks/hooks/databricks_sql.py
@@ -33,7 +33,24 @@ USER_AGENT_STRING = f'airflow-{__version__}'
 
 
 class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
-    """Hook to interact with Databricks SQL."""
+    """
+    Hook to interact with Databricks SQL.
+
+    :param databricks_conn_id: Reference to the
+        :ref:`Databricks connection <howto/connection:databricks>`.
+    :param http_path: Optional string specifying HTTP path of Databricks SQL Endpoint or cluster.
+        If not specified, it should be either specified in the Databricks connection's extra parameters,
+        or ``sql_endpoint_name`` must be specified.
+    :param sql_endpoint_name: Optional name of Databricks SQL Endpoint. If not specified, ``http_path``
+        must be provided as described above.
+    :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
+        If not specified, it could be specified in the Databricks connection's extra parameters.
+    :param http_headers: An optional list of (k, v) pairs that will be set as HTTP headers
+        on every request
+    :param catalog: An optional initial catalog to use. Requires DBR version 9.0+
+    :param schema: An optional initial schema to use. Requires DBR version 9.0+
+    :param kwargs: Additional parameters internal to Databricks SQL Connector parameters
+    """
 
     hook_name = 'Databricks SQL'
 
@@ -48,24 +65,6 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
         schema: Optional[str] = None,
         **kwargs,
     ) -> None:
-        """
-        Initializes DatabricksSqlHook
-
-        :param databricks_conn_id: Reference to the
-            :ref:`Databricks connection <howto/connection:databricks>`.
-        :param http_path: Optional string specifying HTTP path of Databricks SQL Endpoint or cluster.
-            If not specified, it should be either specified in the Databricks connection's extra parameters,
-            or ``sql_endpoint_name`` must be specified.
-        :param sql_endpoint_name: Optional name of Databricks SQL Endpoint. If not specified, ``http_path``
-            must be provided as described above.
-        :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
-            If not specified, it could be specified in the Databricks connection's extra parameters.
-        :param http_headers: An optional list of (k, v) pairs that will be set as HTTP headers
-            on every request
-        :param catalog: An optional initial catalog to use. Requires DBR version 9.0+
-        :param schema: An optional initial schema to use. Requires DBR version 9.0+
-        :param kwargs: Additional parameters internal to Databricks SQL Connector parameters
-        """
         super().__init__(databricks_conn_id)
         self._sql_conn = None
         self._token: Optional[str] = None
diff --git a/docs/apache-airflow-providers-databricks/index.rst b/docs/apache-airflow-providers-databricks/index.rst
index 968b94149b..1a6d32cab3 100644
--- a/docs/apache-airflow-providers-databricks/index.rst
+++ b/docs/apache-airflow-providers-databricks/index.rst
@@ -81,7 +81,6 @@ PIP package                   Version required
 ============================  ===================
 ``apache-airflow``            ``>=2.1.0``
 ``databricks-sql-connector``  ``>=2.0.0, <3.0.0``
->>>>>>> DatabricksSqlOperator - switch to databricks-sql-connector 2.x
 ``requests``                  ``>=2.26.0, <3``
 ============================  ===================
 
diff --git a/docs/apache-airflow-providers-databricks/operators/copy_into.rst b/docs/apache-airflow-providers-databricks/operators/copy_into.rst
index 1d4ef07de2..79716c256f 100644
--- a/docs/apache-airflow-providers-databricks/operators/copy_into.rst
+++ b/docs/apache-airflow-providers-databricks/operators/copy_into.rst
@@ -29,50 +29,14 @@ command.
 Using the Operator
 ------------------
 
-Operator loads data from a specified location into a table using a configured endpoint.
+Operator loads data from a specified location into a table using a configured endpoint.  The only required parameters are:
 
-.. list-table::
-   :widths: 15 25
-   :header-rows: 1
+* ``table_name`` - string with the table name
+* ``file_location`` - string with the URI of data to load
+* ``file_format`` - string specifying the file format of data to load. Supported formats are ``CSV``, ``JSON``, ``AVRO``, ``ORC``, ``PARQUET``, ``TEXT``, ``BINARYFILE``.
+* One of ``sql_endpoint_name`` (name of Databricks SQL endpoint to use) or ``http_path`` (HTTP path for Databricks SQL endpoint or Databricks cluster).
 
-   * - Parameter
-     - Input
-   * - table_name: str
-     - Required name of the table.
-   * - file_location: str
-     - Required location of files to import.
-   * - file_format: str
-     - Required file format. Supported formats are ``CSV``, ``JSON``, ``AVRO``, ``ORC``, ``PARQUET``, ``TEXT``, ``BINARYFILE``.
-   * - sql_endpoint_name: str
-     - Optional name of Databricks SQL endpoint to use. If not specified, ``http_path`` should be provided.
-   * - http_path: str
-     - Optional HTTP path for Databricks SQL endpoint or Databricks cluster. If not specified, it should be provided in Databricks connection, or the ``sql_endpoint_name`` parameter must be set.
-   * - session_configuration: dict[str,str]
-     - optional dict specifying Spark configuration parameters that will be set for the session.
-   * - http_headers: list[tuple[str, str]]
-     - Optional list of (k, v) pairs that will be set as HTTP headers on every request
-   * - client_parameters: dict[str,str]
-     - optional additional parameters internal to Databricks SQL Connector parameters
-   * - files: list[str]]
-     - optional list of files to import. Can't be specified together with ``pattern``.
-   * - pattern: str
-     - optional regex string to match file names to import. Can't be specified together with ``files``.
-   * - expression_list: str
-     - optional string that will be used in the ``SELECT`` expression.
-   * - credential: dict[str, str]
-     - optional credential configuration for authentication against a specified location
-   * - encryption: dict[str, str]
-     - optional encryption configuration for a specified location
-   * - storage_credential: str
-     - optional Unity Catalog storage credential name for the target table
-   * - format_options: dict[str, str]
-     - optional dictionary with options specific for a given file format.
-   * - force_copy: bool
-     - optional boolean parameter to control forcing of data import (could be also specified in ``copy_options``).
-   * - copy_options: dict[str, str]
-     - optional dictionary of copy options. Right now only ``force`` option is supported.
-   * - validate: union[bool, int]]
-     - optional validation configuration. ``True`` forces validation of all rows, positive number - only N first rows. (requires Preview channel)
+Other parameters are optional and could be found in the class documentation.
 
 Examples
 --------
diff --git a/docs/apache-airflow-providers-databricks/operators/sql.rst b/docs/apache-airflow-providers-databricks/operators/sql.rst
index d0a1d6d337..93a3b88007 100644
--- a/docs/apache-airflow-providers-databricks/operators/sql.rst
+++ b/docs/apache-airflow-providers-databricks/operators/sql.rst
@@ -29,44 +29,17 @@ on a `Databricks SQL endpoint  <https://docs.databricks.com/sql/admin/sql-endpoi
 Using the Operator
 ------------------
 
-Operator executes given SQL queries against configured endpoint.  There are 3 ways of specifying SQL queries:
-
-1. Simple string with SQL statement.
-2. List of strings representing SQL statements.
-3. Name of the file with SQL queries. File must have ``.sql`` extension. Each query should finish with ``;<new_line>``
-
-.. list-table::
-   :widths: 15 25
-   :header-rows: 1
-
-   * - Parameter
-     - Input
-   * - sql: str or list[str]
-     - Required parameter specifying a queries to execute.
-   * - sql_endpoint_name: str
-     - Optional name of Databricks SQL endpoint to use. If not specified, ``http_path`` should be provided.
-   * - http_path: str
-     - Optional HTTP path for Databricks SQL endpoint or Databricks cluster. If not specified, it should be provided in Databricks connection, or the ``sql_endpoint_name`` parameter must be set.
-   * - parameters: dict[str, any]
-     - Optional parameters that will be used to substitute variable(s) in SQL query.
-   * - session_configuration: dict[str,str]
-     - optional dict specifying Spark configuration parameters that will be set for the session.
-   * - http_headers: list[tuple[str, str]]
-     - Optional list of (k, v) pairs that will be set as HTTP headers on every request
-   * - client_parameters: dict[str,str]
-     - optional additional parameters internal to Databricks SQL Connector parameters
-   * - catalog: str
-     - Optional initial catalog to use. Requires DBR version 9.0+
-   * - schema: str
-     - Optional initial schema to use. Requires DBR version 9.0+
-   * - output_path: str
-     - Optional path to the file to which results will be written.
-   * - output_format: str
-     - Name of the format which will be used to write results.  Supported values are (case-insensitive): ``JSON`` (array of JSON objects), ``JSONL`` (each row as JSON object on a separate line), ``CSV`` (default).
-   * - csv_params: dict[str, any]
-     - Optional dictionary with parameters to customize Python CSV writer.
-   * - do_xcom_push: bool
-     - whether we should push query results (last query if multiple queries are provided) to xcom. Default: false
+Operator executes given SQL queries against configured endpoint. The only required parameters are:
+
+* ``sql`` - SQL queries to execute. There are 3 ways of specifying SQL queries:
+
+  1. Simple string with SQL statement.
+  2. List of strings representing SQL statements.
+  3. Name of the file with SQL queries. File must have ``.sql`` extension. Each query should finish with ``;<new_line>``
+
+* One of ``sql_endpoint_name`` (name of Databricks SQL endpoint to use) or ``http_path`` (HTTP path for Databricks SQL endpoint or Databricks cluster).
+
+Other parameters are optional and could be found in the class documentation.
 
 Examples
 --------