You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2019/12/12 05:55:19 UTC

[GitHub] [airflow] baolsen commented on a change in pull request #6773: [AIRFLOW-6038] AWS DataSync example_dags added

baolsen commented on a change in pull request #6773: [AIRFLOW-6038] AWS DataSync example_dags added
URL: https://github.com/apache/airflow/pull/6773#discussion_r356969219
 
 

 ##########
 File path: airflow/providers/amazon/aws/example_dags/example_datasync_complex.py
 ##########
 @@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+This is an example dag for using `AWSDataSyncOperator` in a more complex manner.
+
+- Try to get a TaskArn. If one exists, update it.
+- If no tasks exist, try to create a new DataSync Task.
+    - If source and destination locations dont exist for the new task, create them first
+- If many tasks exist, raise an Exception
+- After getting or creating a DataSync Task, run it
+
+This DAG relies on the following environment variables:
+
+* SOURCE_LOCATION_URI - Source location URI, usually on premisis SMB or NFS
+* DESTINATION_LOCATION_URI - Destination location URI, usually S3
+* CREATE_TASK_KWARGS - Passed to boto3.create_task(**kwargs)
+* CREATE_SOURCE_LOCATION_KWARGS - Passed to boto3.create_location(**kwargs)
+* CREATE_DESTINATION_LOCATION_KWARGS - Passed to boto3.create_location(**kwargs)
+* UPDATE_TASK_KWARGS - Passed to boto3.update_task(**kwargs)
+"""
+
+import json
+from os import getenv
+
+from airflow import models, utils
+from airflow.providers.amazon.aws.operators.datasync import AWSDataSyncOperator
+
+# [START howto_operator_datasync_complex_args]
+SOURCE_LOCATION_URI = getenv(
+    "SOURCE_LOCATION_URI", "smb://hostname/directory/")
+
+DESTINATION_LOCATION_URI = getenv(
+    "DESTINATION_LOCATION_URI", "s3://mybucket/prefix")
+
+default_create_task_kwargs = '{"Name": "Created by Airflow"}'
+CREATE_TASK_KWARGS = json.loads(
+    getenv("CREATE_TASK_KWARGS", default_create_task_kwargs)
+)
+
+default_create_source_location_kwargs = "{}"
+CREATE_SOURCE_LOCATION_KWARGS = json.loads(
+    getenv("CREATE_SOURCE_LOCATION_KWARGS",
+           default_create_source_location_kwargs)
+)
+
+bucket_access_role_arn = (
+    "arn:aws:iam::11112223344:role/r-11112223344-my-bucket-access-role"
+)
+default_destination_location_kwargs = """\
+{"S3BucketArn": "arn:aws:s3:::mybucket",
+    "S3Config": {"BucketAccessRoleArn": bucket_access_role_arn}
+}"""
+CREATE_DESTINATION_LOCATION_KWARGS = json.loads(
+    getenv("CREATE_DESTINATION_LOCATION_KWARGS",
+           default_destination_location_kwargs)
+)
+
+default_update_task_kwargs = '{"Name": "Updated by Airflow"}'
+UPDATE_TASK_KWARGS = json.loads(
+    getenv("UPDATE_TASK_KWARGS", default_update_task_kwargs)
+)
+
+default_args = {"start_date": utils.dates.days_ago(1)}
+# [END howto_operator_datasync_complex_args]
+
+with models.DAG(
+    "example_datasync_complex",
 
 Review comment:
   Agreed :) I'll change them to "example_1" and "example_2" to make it clearer.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services