You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/04/25 22:13:54 UTC
[airflow] 03/03: Address review comments
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
commit a58506b2a68f0d4533b41feb67efb0caf34e14d8
Author: Alex Ott <al...@gmail.com>
AuthorDate: Sun Apr 24 11:36:28 2022 +0200
Address review comments
---
.../providers/databricks/hooks/databricks_sql.py | 37 ++++++++--------
docs/apache-airflow-providers-databricks/index.rst | 1 -
.../operators/copy_into.rst | 48 +++------------------
.../operators/sql.rst | 49 +++++-----------------
4 files changed, 35 insertions(+), 100 deletions(-)
diff --git a/airflow/providers/databricks/hooks/databricks_sql.py b/airflow/providers/databricks/hooks/databricks_sql.py
index aa8245772a..afc165ee20 100644
--- a/airflow/providers/databricks/hooks/databricks_sql.py
+++ b/airflow/providers/databricks/hooks/databricks_sql.py
@@ -33,7 +33,24 @@ USER_AGENT_STRING = f'airflow-{__version__}'
class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
- """Hook to interact with Databricks SQL."""
+ """
+ Hook to interact with Databricks SQL.
+
+ :param databricks_conn_id: Reference to the
+ :ref:`Databricks connection <howto/connection:databricks>`.
+ :param http_path: Optional string specifying HTTP path of Databricks SQL Endpoint or cluster.
+ If not specified, it should be either specified in the Databricks connection's extra parameters,
+ or ``sql_endpoint_name`` must be specified.
+ :param sql_endpoint_name: Optional name of Databricks SQL Endpoint. If not specified, ``http_path``
+ must be provided as described above.
+ :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
+ If not specified, it could be specified in the Databricks connection's extra parameters.
+ :param http_headers: An optional list of (k, v) pairs that will be set as HTTP headers
+ on every request
+ :param catalog: An optional initial catalog to use. Requires DBR version 9.0+
+ :param schema: An optional initial schema to use. Requires DBR version 9.0+
+ :param kwargs: Additional parameters internal to Databricks SQL Connector parameters
+ """
hook_name = 'Databricks SQL'
@@ -48,24 +65,6 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
schema: Optional[str] = None,
**kwargs,
) -> None:
- """
- Initializes DatabricksSqlHook
-
- :param databricks_conn_id: Reference to the
- :ref:`Databricks connection <howto/connection:databricks>`.
- :param http_path: Optional string specifying HTTP path of Databricks SQL Endpoint or cluster.
- If not specified, it should be either specified in the Databricks connection's extra parameters,
- or ``sql_endpoint_name`` must be specified.
- :param sql_endpoint_name: Optional name of Databricks SQL Endpoint. If not specified, ``http_path``
- must be provided as described above.
- :param session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
- If not specified, it could be specified in the Databricks connection's extra parameters.
- :param http_headers: An optional list of (k, v) pairs that will be set as HTTP headers
- on every request
- :param catalog: An optional initial catalog to use. Requires DBR version 9.0+
- :param schema: An optional initial schema to use. Requires DBR version 9.0+
- :param kwargs: Additional parameters internal to Databricks SQL Connector parameters
- """
super().__init__(databricks_conn_id)
self._sql_conn = None
self._token: Optional[str] = None
diff --git a/docs/apache-airflow-providers-databricks/index.rst b/docs/apache-airflow-providers-databricks/index.rst
index 968b94149b..1a6d32cab3 100644
--- a/docs/apache-airflow-providers-databricks/index.rst
+++ b/docs/apache-airflow-providers-databricks/index.rst
@@ -81,7 +81,6 @@ PIP package Version required
============================ ===================
``apache-airflow`` ``>=2.1.0``
``databricks-sql-connector`` ``>=2.0.0, <3.0.0``
->>>>>>> DatabricksSqlOperator - switch to databricks-sql-connector 2.x
``requests`` ``>=2.26.0, <3``
============================ ===================
diff --git a/docs/apache-airflow-providers-databricks/operators/copy_into.rst b/docs/apache-airflow-providers-databricks/operators/copy_into.rst
index 1d4ef07de2..79716c256f 100644
--- a/docs/apache-airflow-providers-databricks/operators/copy_into.rst
+++ b/docs/apache-airflow-providers-databricks/operators/copy_into.rst
@@ -29,50 +29,14 @@ command.
Using the Operator
------------------
-Operator loads data from a specified location into a table using a configured endpoint.
+Operator loads data from a specified location into a table using a configured endpoint. The only required parameters are:
-.. list-table::
- :widths: 15 25
- :header-rows: 1
+* ``table_name`` - string with the table name
+* ``file_location`` - string with the URI of data to load
+* ``file_format`` - string specifying the file format of data to load. Supported formats are ``CSV``, ``JSON``, ``AVRO``, ``ORC``, ``PARQUET``, ``TEXT``, ``BINARYFILE``.
+* One of ``sql_endpoint_name`` (name of Databricks SQL endpoint to use) or ``http_path`` (HTTP path for Databricks SQL endpoint or Databricks cluster).
- * - Parameter
- - Input
- * - table_name: str
- - Required name of the table.
- * - file_location: str
- - Required location of files to import.
- * - file_format: str
- - Required file format. Supported formats are ``CSV``, ``JSON``, ``AVRO``, ``ORC``, ``PARQUET``, ``TEXT``, ``BINARYFILE``.
- * - sql_endpoint_name: str
- - Optional name of Databricks SQL endpoint to use. If not specified, ``http_path`` should be provided.
- * - http_path: str
- - Optional HTTP path for Databricks SQL endpoint or Databricks cluster. If not specified, it should be provided in Databricks connection, or the ``sql_endpoint_name`` parameter must be set.
- * - session_configuration: dict[str,str]
- - optional dict specifying Spark configuration parameters that will be set for the session.
- * - http_headers: list[tuple[str, str]]
- - Optional list of (k, v) pairs that will be set as HTTP headers on every request
- * - client_parameters: dict[str,str]
- - optional additional parameters internal to Databricks SQL Connector parameters
- * - files: list[str]]
- - optional list of files to import. Can't be specified together with ``pattern``.
- * - pattern: str
- - optional regex string to match file names to import. Can't be specified together with ``files``.
- * - expression_list: str
- - optional string that will be used in the ``SELECT`` expression.
- * - credential: dict[str, str]
- - optional credential configuration for authentication against a specified location
- * - encryption: dict[str, str]
- - optional encryption configuration for a specified location
- * - storage_credential: str
- - optional Unity Catalog storage credential name for the target table
- * - format_options: dict[str, str]
- - optional dictionary with options specific for a given file format.
- * - force_copy: bool
- - optional boolean parameter to control forcing of data import (could be also specified in ``copy_options``).
- * - copy_options: dict[str, str]
- - optional dictionary of copy options. Right now only ``force`` option is supported.
- * - validate: union[bool, int]]
- - optional validation configuration. ``True`` forces validation of all rows, positive number - only N first rows. (requires Preview channel)
+Other parameters are optional and could be found in the class documentation.
Examples
--------
diff --git a/docs/apache-airflow-providers-databricks/operators/sql.rst b/docs/apache-airflow-providers-databricks/operators/sql.rst
index d0a1d6d337..93a3b88007 100644
--- a/docs/apache-airflow-providers-databricks/operators/sql.rst
+++ b/docs/apache-airflow-providers-databricks/operators/sql.rst
@@ -29,44 +29,17 @@ on a `Databricks SQL endpoint <https://docs.databricks.com/sql/admin/sql-endpoi
Using the Operator
------------------
-Operator executes given SQL queries against configured endpoint. There are 3 ways of specifying SQL queries:
-
-1. Simple string with SQL statement.
-2. List of strings representing SQL statements.
-3. Name of the file with SQL queries. File must have ``.sql`` extension. Each query should finish with ``;<new_line>``
-
-.. list-table::
- :widths: 15 25
- :header-rows: 1
-
- * - Parameter
- - Input
- * - sql: str or list[str]
- - Required parameter specifying a queries to execute.
- * - sql_endpoint_name: str
- - Optional name of Databricks SQL endpoint to use. If not specified, ``http_path`` should be provided.
- * - http_path: str
- - Optional HTTP path for Databricks SQL endpoint or Databricks cluster. If not specified, it should be provided in Databricks connection, or the ``sql_endpoint_name`` parameter must be set.
- * - parameters: dict[str, any]
- - Optional parameters that will be used to substitute variable(s) in SQL query.
- * - session_configuration: dict[str,str]
- - optional dict specifying Spark configuration parameters that will be set for the session.
- * - http_headers: list[tuple[str, str]]
- - Optional list of (k, v) pairs that will be set as HTTP headers on every request
- * - client_parameters: dict[str,str]
- - optional additional parameters internal to Databricks SQL Connector parameters
- * - catalog: str
- - Optional initial catalog to use. Requires DBR version 9.0+
- * - schema: str
- - Optional initial schema to use. Requires DBR version 9.0+
- * - output_path: str
- - Optional path to the file to which results will be written.
- * - output_format: str
- - Name of the format which will be used to write results. Supported values are (case-insensitive): ``JSON`` (array of JSON objects), ``JSONL`` (each row as JSON object on a separate line), ``CSV`` (default).
- * - csv_params: dict[str, any]
- - Optional dictionary with parameters to customize Python CSV writer.
- * - do_xcom_push: bool
- - whether we should push query results (last query if multiple queries are provided) to xcom. Default: false
+Operator executes given SQL queries against configured endpoint. The only required parameters are:
+
+* ``sql`` - SQL queries to execute. There are 3 ways of specifying SQL queries:
+
+ 1. Simple string with SQL statement.
+ 2. List of strings representing SQL statements.
+ 3. Name of the file with SQL queries. File must have ``.sql`` extension. Each query should finish with ``;<new_line>``
+
+* One of ``sql_endpoint_name`` (name of Databricks SQL endpoint to use) or ``http_path`` (HTTP path for Databricks SQL endpoint or Databricks cluster).
+
+Other parameters are optional and could be found in the class documentation.
Examples
--------