You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by el...@apache.org on 2023/09/08 16:27:38 UTC

[airflow] branch main updated: Fix dataplex system tests (#34214)

This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6f9443f789 Fix dataplex system tests (#34214)
6f9443f789 is described below

commit 6f9443f789343546934626ca9d5ad283976e381a
Author: Beata Kossakowska <10...@users.noreply.github.com>
AuthorDate: Fri Sep 8 18:27:32 2023 +0200

    Fix dataplex system tests (#34214)
    
    Co-authored-by: Beata Kossakowska <bk...@google.com>
---
 .../google/cloud/dataplex/example_dataplex.py      | 29 ++++++++-------
 .../google/cloud/dataplex/resources/__init__.py    | 16 --------
 .../cloud/dataplex/resources/spark_example_pi.py   | 43 ----------------------
 3 files changed, 16 insertions(+), 72 deletions(-)

diff --git a/tests/system/providers/google/cloud/dataplex/example_dataplex.py b/tests/system/providers/google/cloud/dataplex/example_dataplex.py
index bc2b6462b8..11c9edb507 100644
--- a/tests/system/providers/google/cloud/dataplex/example_dataplex.py
+++ b/tests/system/providers/google/cloud/dataplex/example_dataplex.py
@@ -21,7 +21,6 @@ from __future__ import annotations
 
 import datetime
 import os
-from pathlib import Path
 
 from airflow import models
 from airflow.models.baseoperator import chain
@@ -33,9 +32,12 @@ from airflow.providers.google.cloud.operators.dataplex import (
     DataplexGetTaskOperator,
     DataplexListTasksOperator,
 )
-from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
+from airflow.providers.google.cloud.operators.gcs import (
+    GCSCreateBucketOperator,
+    GCSDeleteBucketOperator,
+    GCSSynchronizeBucketsOperator,
+)
 from airflow.providers.google.cloud.sensors.dataplex import DataplexTaskStateSensor
-from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
 from airflow.utils.trigger_rule import TriggerRule
 
 ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
@@ -46,13 +48,12 @@ DAG_ID = "example_dataplex"
 BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
 
 SPARK_FILE_NAME = "spark_example_pi.py"
-CURRENT_FOLDER = Path(__file__).parent
-FILE_LOCAL_PATH = str(Path(CURRENT_FOLDER) / "resources" / SPARK_FILE_NAME)
+RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
 
-LAKE_ID = f"test-lake-{ENV_ID}"
+LAKE_ID = f"test-lake-dataplex-{ENV_ID}"
 REGION = "us-central1"
 
-SERVICE_ACC = os.environ.get("GCP_DATAPLEX_SERVICE_ACC")
+SERVICE_ACC = f"{PROJECT_ID}@appspot.gserviceaccount.com"
 
 SPARK_FILE_FULL_PATH = f"gs://{BUCKET_NAME}/{SPARK_FILE_NAME}"
 DATAPLEX_TASK_ID = f"test-task-{ENV_ID}"
@@ -87,11 +88,13 @@ with models.DAG(
         task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID
     )
 
-    upload_file = LocalFilesystemToGCSOperator(
-        task_id="upload_file",
-        src=FILE_LOCAL_PATH,
-        dst=SPARK_FILE_NAME,
-        bucket=BUCKET_NAME,
+    sync_bucket = GCSSynchronizeBucketsOperator(
+        task_id="sync_bucket",
+        source_bucket=RESOURCE_DATA_BUCKET,
+        source_object=SPARK_FILE_NAME,
+        destination_bucket=BUCKET_NAME,
+        destination_object=SPARK_FILE_NAME,
+        recursive=True,
     )
     # [START howto_dataplex_create_lake_operator]
     create_lake = DataplexCreateLakeOperator(
@@ -186,7 +189,7 @@ with models.DAG(
     chain(
         # TEST SETUP
         create_bucket,
-        upload_file,
+        sync_bucket,
         # TEST BODY
         create_lake,
         create_dataplex_task,
diff --git a/tests/system/providers/google/cloud/dataplex/resources/__init__.py b/tests/system/providers/google/cloud/dataplex/resources/__init__.py
deleted file mode 100644
index 13a83393a9..0000000000
--- a/tests/system/providers/google/cloud/dataplex/resources/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py b/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py
deleted file mode 100644
index be477d202a..0000000000
--- a/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import annotations
-
-import random
-import sys
-from operator import add
-
-from pyspark.sql import SparkSession
-
-if __name__ == "__main__":
-    """
-    Usage: pi [partitions]
-    """
-    spark = SparkSession.builder.appName("PythonPi").getOrCreate()
-
-    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
-    n = 100000 * partitions
-
-    def f(_: int) -> float:
-        x = random.random() * 2 - 1
-        y = random.random() * 2 - 1
-        return 1 if x**2 + y**2 <= 1 else 0
-
-    count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
-    print(f"Pi is roughly {4.0 * count / n:f}")
-
-    spark.stop()