You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/09/26 11:05:19 UTC

[airflow] branch main updated: Add guide for Apache Druid operators (#18527)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2643345  Add guide for Apache Druid operators (#18527)
2643345 is described below

commit 2643345e4b72064c605e42901a3dc531e6aa2f4e
Author: Jason Wu <wu...@gmail.com>
AuthorDate: Sun Sep 26 19:04:59 2021 +0800

    Add guide for Apache Druid operators (#18527)
---
 .../apache/druid/example_dags/__init__.py          | 17 +++++++
 .../apache/druid/example_dags/example_druid_dag.py | 52 ++++++++++++++++++++++
 airflow/providers/apache/druid/provider.yaml       |  4 +-
 .../index.rst                                      |  7 +++
 .../operators.rst                                  | 52 ++++++++++++++++++++++
 5 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/airflow/providers/apache/druid/example_dags/__init__.py b/airflow/providers/apache/druid/example_dags/__init__.py
new file mode 100644
index 0000000..217e5db
--- /dev/null
+++ b/airflow/providers/apache/druid/example_dags/__init__.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/airflow/providers/apache/druid/example_dags/example_druid_dag.py b/airflow/providers/apache/druid/example_dags/example_druid_dag.py
new file mode 100644
index 0000000..af9ab99
--- /dev/null
+++ b/airflow/providers/apache/druid/example_dags/example_druid_dag.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Example Airflow DAG to submit Apache Druid json index file using `DruidOperator`
+"""
+from airflow.models import DAG
+from airflow.providers.apache.druid.operators.druid import DruidOperator
+from airflow.utils.dates import days_ago
+
+with DAG(
+    dag_id='example_druid_operator',
+    schedule_interval=None,
+    start_date=days_ago(2),
+    tags=['example'],
+) as dag:
+    # [START howto_operator_druid_submit]
+    submit_job = DruidOperator(
+        task_id='spark_submit_job',
+        json_index_file='json_index.json',
+        druid_ingest_conn_id='druid_ingest_default',
+    )
+    # Example content of json_index.json:
+    JSON_INDEX_STR = """
+        {
+            "type": "index_hadoop",
+            "datasource": "datasource_prd",
+            "spec": {
+                "dataSchema": {
+                    "granularitySpec": {
+                        "intervals": ["2021-09-01/2021-09-02"]
+                    }
+                }
+            }
+        }
+    """
+    # [END howto_operator_druid_submit]
diff --git a/airflow/providers/apache/druid/provider.yaml b/airflow/providers/apache/druid/provider.yaml
index d052233..5600556 100644
--- a/airflow/providers/apache/druid/provider.yaml
+++ b/airflow/providers/apache/druid/provider.yaml
@@ -19,7 +19,7 @@
 package-name: apache-airflow-providers-apache-druid
 name: Apache Druid
 description: |
-    `Apache Druid <https://druid.apache.org/>`__.
+  `Apache Druid <https://druid.apache.org/>`__.
 
 versions:
   - 2.0.2
@@ -36,6 +36,8 @@ integrations:
   - integration-name: Apache Druid
     external-doc-url: https://druid.apache.org/
     logo: /integration-logos/apache/druid-1.png
+    how-to-guide:
+      - /docs/apache-airflow-providers-apache-druid/operators.rst
     tags: [apache]
 
 operators:
diff --git a/docs/apache-airflow-providers-apache-druid/index.rst b/docs/apache-airflow-providers-apache-druid/index.rst
index cc0ab62..3fbb74a 100644
--- a/docs/apache-airflow-providers-apache-druid/index.rst
+++ b/docs/apache-airflow-providers-apache-druid/index.rst
@@ -23,11 +23,18 @@ Content
 
 .. toctree::
     :maxdepth: 1
+    :caption: Guides
+
+    Operators <operators>
+
+.. toctree::
+    :maxdepth: 1
     :caption: References
 
     Python API <_api/airflow/providers/apache/druid/index>
     PyPI Repository <https://pypi.org/project/apache-airflow-providers-apache-druid/>
     Installing from sources <installing-providers-from-sources>
+    Example DAGs <https://github.com/apache/airflow/tree/main/airflow/providers/apache/druid/example_dags>
 
 .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME!
 
diff --git a/docs/apache-airflow-providers-apache-druid/operators.rst b/docs/apache-airflow-providers-apache-druid/operators.rst
new file mode 100644
index 0000000..f72a56a
--- /dev/null
+++ b/docs/apache-airflow-providers-apache-druid/operators.rst
@@ -0,0 +1,52 @@
+ .. Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+ ..   http://www.apache.org/licenses/LICENSE-2.0
+
+ .. Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+Apache Druid Operators
+======================
+
+.. contents::
+  :depth: 1
+  :local:
+
+Prerequisite
+------------
+
+To use ``DruidOperator``, you must configure a Druid Connection first.
+
+DruidOperator
+-------------------
+
+Submit a task directly to Druid, you need to provide the filepath to the Druid index specification ``json_index_file``, and the connection id of the Druid overlord ``druid_ingest_conn_id`` which accepts index jobs in Airflow Connections.
+
+There is also a example content of the Druid Ingestion specification below.
+
+For parameter definition take a look at :class:`~airflow.providers.apache.druid.operators.druid.DruidOperator`.
+
+Using the operator
+""""""""""""""""""
+
+.. exampleinclude:: /../../airflow/providers/apache/druid/example_dags/example_druid_dag.py
+    :language: python
+    :dedent: 4
+    :start-after: [START howto_operator_druid_submit]
+    :end-before: [END howto_operator_druid_submit]
+
+Reference
+"""""""""
+
+For more information, please refer to `Apache Druid Ingestion spec reference <https://druid.apache.org/docs/latest/ingestion/ingestion-spec.html>`_.