You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/09/26 11:05:19 UTC
[airflow] branch main updated: Add guide for Apache Druid operators
(#18527)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 2643345 Add guide for Apache Druid operators (#18527)
2643345 is described below
commit 2643345e4b72064c605e42901a3dc531e6aa2f4e
Author: Jason Wu <wu...@gmail.com>
AuthorDate: Sun Sep 26 19:04:59 2021 +0800
Add guide for Apache Druid operators (#18527)
---
.../apache/druid/example_dags/__init__.py | 17 +++++++
.../apache/druid/example_dags/example_druid_dag.py | 52 ++++++++++++++++++++++
airflow/providers/apache/druid/provider.yaml | 4 +-
.../index.rst | 7 +++
.../operators.rst | 52 ++++++++++++++++++++++
5 files changed, 131 insertions(+), 1 deletion(-)
diff --git a/airflow/providers/apache/druid/example_dags/__init__.py b/airflow/providers/apache/druid/example_dags/__init__.py
new file mode 100644
index 0000000..217e5db
--- /dev/null
+++ b/airflow/providers/apache/druid/example_dags/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/airflow/providers/apache/druid/example_dags/example_druid_dag.py b/airflow/providers/apache/druid/example_dags/example_druid_dag.py
new file mode 100644
index 0000000..af9ab99
--- /dev/null
+++ b/airflow/providers/apache/druid/example_dags/example_druid_dag.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Example Airflow DAG to submit Apache Druid json index file using `DruidOperator`
+"""
+from airflow.models import DAG
+from airflow.providers.apache.druid.operators.druid import DruidOperator
+from airflow.utils.dates import days_ago
+
+with DAG(
+ dag_id='example_druid_operator',
+ schedule_interval=None,
+ start_date=days_ago(2),
+ tags=['example'],
+) as dag:
+ # [START howto_operator_druid_submit]
+ submit_job = DruidOperator(
+ task_id='spark_submit_job',
+ json_index_file='json_index.json',
+ druid_ingest_conn_id='druid_ingest_default',
+ )
+ # Example content of json_index.json:
+ JSON_INDEX_STR = """
+ {
+ "type": "index_hadoop",
+ "datasource": "datasource_prd",
+ "spec": {
+ "dataSchema": {
+ "granularitySpec": {
+ "intervals": ["2021-09-01/2021-09-02"]
+ }
+ }
+ }
+ }
+ """
+ # [END howto_operator_druid_submit]
diff --git a/airflow/providers/apache/druid/provider.yaml b/airflow/providers/apache/druid/provider.yaml
index d052233..5600556 100644
--- a/airflow/providers/apache/druid/provider.yaml
+++ b/airflow/providers/apache/druid/provider.yaml
@@ -19,7 +19,7 @@
package-name: apache-airflow-providers-apache-druid
name: Apache Druid
description: |
- `Apache Druid <https://druid.apache.org/>`__.
+ `Apache Druid <https://druid.apache.org/>`__.
versions:
- 2.0.2
@@ -36,6 +36,8 @@ integrations:
- integration-name: Apache Druid
external-doc-url: https://druid.apache.org/
logo: /integration-logos/apache/druid-1.png
+ how-to-guide:
+ - /docs/apache-airflow-providers-apache-druid/operators.rst
tags: [apache]
operators:
diff --git a/docs/apache-airflow-providers-apache-druid/index.rst b/docs/apache-airflow-providers-apache-druid/index.rst
index cc0ab62..3fbb74a 100644
--- a/docs/apache-airflow-providers-apache-druid/index.rst
+++ b/docs/apache-airflow-providers-apache-druid/index.rst
@@ -23,11 +23,18 @@ Content
.. toctree::
:maxdepth: 1
+ :caption: Guides
+
+ Operators <operators>
+
+.. toctree::
+ :maxdepth: 1
:caption: References
Python API <_api/airflow/providers/apache/druid/index>
PyPI Repository <https://pypi.org/project/apache-airflow-providers-apache-druid/>
Installing from sources <installing-providers-from-sources>
+ Example DAGs <https://github.com/apache/airflow/tree/main/airflow/providers/apache/druid/example_dags>
.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME!
diff --git a/docs/apache-airflow-providers-apache-druid/operators.rst b/docs/apache-airflow-providers-apache-druid/operators.rst
new file mode 100644
index 0000000..f72a56a
--- /dev/null
+++ b/docs/apache-airflow-providers-apache-druid/operators.rst
@@ -0,0 +1,52 @@
+ .. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ .. http://www.apache.org/licenses/LICENSE-2.0
+
+ .. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+
+Apache Druid Operators
+======================
+
+.. contents::
+ :depth: 1
+ :local:
+
+Prerequisite
+------------
+
+To use ``DruidOperator``, you must configure a Druid Connection first.
+
+DruidOperator
+-------------------
+
+Submit a task directly to Druid, you need to provide the filepath to the Druid index specification ``json_index_file``, and the connection id of the Druid overlord ``druid_ingest_conn_id`` which accepts index jobs in Airflow Connections.
+
+There is also a example content of the Druid Ingestion specification below.
+
+For parameter definition take a look at :class:`~airflow.providers.apache.druid.operators.druid.DruidOperator`.
+
+Using the operator
+""""""""""""""""""
+
+.. exampleinclude:: /../../airflow/providers/apache/druid/example_dags/example_druid_dag.py
+ :language: python
+ :dedent: 4
+ :start-after: [START howto_operator_druid_submit]
+ :end-before: [END howto_operator_druid_submit]
+
+Reference
+"""""""""
+
+For more information, please refer to `Apache Druid Ingestion spec reference <https://druid.apache.org/docs/latest/ingestion/ingestion-spec.html>`_.