You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by el...@apache.org on 2023/09/08 16:27:38 UTC
[airflow] branch main updated: Fix dataplex system tests (#34214)
This is an automated email from the ASF dual-hosted git repository.
eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 6f9443f789 Fix dataplex system tests (#34214)
6f9443f789 is described below
commit 6f9443f789343546934626ca9d5ad283976e381a
Author: Beata Kossakowska <10...@users.noreply.github.com>
AuthorDate: Fri Sep 8 18:27:32 2023 +0200
Fix dataplex system tests (#34214)
Co-authored-by: Beata Kossakowska <bk...@google.com>
---
.../google/cloud/dataplex/example_dataplex.py | 29 ++++++++-------
.../google/cloud/dataplex/resources/__init__.py | 16 --------
.../cloud/dataplex/resources/spark_example_pi.py | 43 ----------------------
3 files changed, 16 insertions(+), 72 deletions(-)
diff --git a/tests/system/providers/google/cloud/dataplex/example_dataplex.py b/tests/system/providers/google/cloud/dataplex/example_dataplex.py
index bc2b6462b8..11c9edb507 100644
--- a/tests/system/providers/google/cloud/dataplex/example_dataplex.py
+++ b/tests/system/providers/google/cloud/dataplex/example_dataplex.py
@@ -21,7 +21,6 @@ from __future__ import annotations
import datetime
import os
-from pathlib import Path
from airflow import models
from airflow.models.baseoperator import chain
@@ -33,9 +32,12 @@ from airflow.providers.google.cloud.operators.dataplex import (
DataplexGetTaskOperator,
DataplexListTasksOperator,
)
-from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
+from airflow.providers.google.cloud.operators.gcs import (
+ GCSCreateBucketOperator,
+ GCSDeleteBucketOperator,
+ GCSSynchronizeBucketsOperator,
+)
from airflow.providers.google.cloud.sensors.dataplex import DataplexTaskStateSensor
-from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator
from airflow.utils.trigger_rule import TriggerRule
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
@@ -46,13 +48,12 @@ DAG_ID = "example_dataplex"
BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}"
SPARK_FILE_NAME = "spark_example_pi.py"
-CURRENT_FOLDER = Path(__file__).parent
-FILE_LOCAL_PATH = str(Path(CURRENT_FOLDER) / "resources" / SPARK_FILE_NAME)
+RESOURCE_DATA_BUCKET = "airflow-system-tests-resources"
-LAKE_ID = f"test-lake-{ENV_ID}"
+LAKE_ID = f"test-lake-dataplex-{ENV_ID}"
REGION = "us-central1"
-SERVICE_ACC = os.environ.get("GCP_DATAPLEX_SERVICE_ACC")
+SERVICE_ACC = f"{PROJECT_ID}@appspot.gserviceaccount.com"
SPARK_FILE_FULL_PATH = f"gs://{BUCKET_NAME}/{SPARK_FILE_NAME}"
DATAPLEX_TASK_ID = f"test-task-{ENV_ID}"
@@ -87,11 +88,13 @@ with models.DAG(
task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID
)
- upload_file = LocalFilesystemToGCSOperator(
- task_id="upload_file",
- src=FILE_LOCAL_PATH,
- dst=SPARK_FILE_NAME,
- bucket=BUCKET_NAME,
+ sync_bucket = GCSSynchronizeBucketsOperator(
+ task_id="sync_bucket",
+ source_bucket=RESOURCE_DATA_BUCKET,
+ source_object=SPARK_FILE_NAME,
+ destination_bucket=BUCKET_NAME,
+ destination_object=SPARK_FILE_NAME,
+ recursive=True,
)
# [START howto_dataplex_create_lake_operator]
create_lake = DataplexCreateLakeOperator(
@@ -186,7 +189,7 @@ with models.DAG(
chain(
# TEST SETUP
create_bucket,
- upload_file,
+ sync_bucket,
# TEST BODY
create_lake,
create_dataplex_task,
diff --git a/tests/system/providers/google/cloud/dataplex/resources/__init__.py b/tests/system/providers/google/cloud/dataplex/resources/__init__.py
deleted file mode 100644
index 13a83393a9..0000000000
--- a/tests/system/providers/google/cloud/dataplex/resources/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py b/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py
deleted file mode 100644
index be477d202a..0000000000
--- a/tests/system/providers/google/cloud/dataplex/resources/spark_example_pi.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import annotations
-
-import random
-import sys
-from operator import add
-
-from pyspark.sql import SparkSession
-
-if __name__ == "__main__":
- """
- Usage: pi [partitions]
- """
- spark = SparkSession.builder.appName("PythonPi").getOrCreate()
-
- partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
- n = 100000 * partitions
-
- def f(_: int) -> float:
- x = random.random() * 2 - 1
- y = random.random() * 2 - 1
- return 1 if x**2 + y**2 <= 1 else 0
-
- count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
- print(f"Pi is roughly {4.0 * count / n:f}")
-
- spark.stop()