You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2022/07/30 06:23:53 UTC

[airflow] branch main updated: Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)

This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new d21947ef8c Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)
d21947ef8c is described below

commit d21947ef8cfb7ea4700a6de4d0dee168e7b2f693
Author: Ephraim Anierobi <sp...@gmail.com>
AuthorDate: Sat Jul 30 07:23:42 2022 +0100

    Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)
---
 .../api_connexion/endpoints/dataset_endpoint.py    |  9 +++++-
 airflow/api_connexion/openapi/v1.yaml              |  7 +++++
 airflow/www/static/js/types/api-generated.ts       |  2 ++
 .../endpoints/test_dataset_endpoint.py             | 32 ++++++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/airflow/api_connexion/endpoints/dataset_endpoint.py b/airflow/api_connexion/endpoints/dataset_endpoint.py
index b277f138d5..b6d4ac0ca5 100644
--- a/airflow/api_connexion/endpoints/dataset_endpoint.py
+++ b/airflow/api_connexion/endpoints/dataset_endpoint.py
@@ -57,13 +57,20 @@ def get_dataset(id: int, session: Session = NEW_SESSION) -> APIResponse:
 @format_parameters({'limit': check_limit})
 @provide_session
 def get_datasets(
-    *, limit: int, offset: int = 0, order_by: str = "id", session: Session = NEW_SESSION
+    *,
+    limit: int,
+    offset: int = 0,
+    uri_pattern: Optional[str] = None,
+    order_by: str = "id",
+    session: Session = NEW_SESSION,
 ) -> APIResponse:
     """Get datasets"""
     allowed_attrs = ['id', 'uri', 'created_at', 'updated_at']
 
     total_entries = session.query(func.count(Dataset.id)).scalar()
     query = session.query(Dataset)
+    if uri_pattern:
+        query = query.filter(Dataset.uri.ilike(f"%{uri_pattern}%"))
     query = apply_sorting(query, order_by, {}, allowed_attrs)
     datasets = (
         query.options(
diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml
index a1e751066e..27b1c7a98a 100644
--- a/airflow/api_connexion/openapi/v1.yaml
+++ b/airflow/api_connexion/openapi/v1.yaml
@@ -1647,6 +1647,13 @@ paths:
         - $ref: '#/components/parameters/PageLimit'
         - $ref: '#/components/parameters/PageOffset'
         - $ref: '#/components/parameters/OrderBy'
+        - name: uri_pattern
+          in: query
+          schema:
+            type: string
+          required: false
+          description: |
+            If set, only return datasets with uris matching this pattern.
       responses:
         '200':
           description: Success.
diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts
index 9b67414ed2..b0d7110d64 100644
--- a/airflow/www/static/js/types/api-generated.ts
+++ b/airflow/www/static/js/types/api-generated.ts
@@ -3569,6 +3569,8 @@ export interface operations {
          * *New in version 2.1.0*
          */
         order_by?: components["parameters"]["OrderBy"];
+        /** If set, only return datasets with uris matching this pattern. */
+        uri_pattern?: string;
       };
     };
     responses: {
diff --git a/tests/api_connexion/endpoints/test_dataset_endpoint.py b/tests/api_connexion/endpoints/test_dataset_endpoint.py
index 0207db090d..6e71592785 100644
--- a/tests/api_connexion/endpoints/test_dataset_endpoint.py
+++ b/tests/api_connexion/endpoints/test_dataset_endpoint.py
@@ -194,6 +194,38 @@ class TestGetDatasets(TestDatasetEndpoint):
 
         assert_401(response)
 
+    @parameterized.expand(
+        [
+            ("api/v1/datasets?uri_pattern=s3", {"s3://folder/key"}),
+            ("api/v1/datasets?uri_pattern=bucket", {"gcp://bucket/key", 'wasb://some_dataset_bucket_/key'}),
+            (
+                "api/v1/datasets?uri_pattern=dataset",
+                {"somescheme://dataset/key", "wasb://some_dataset_bucket_/key"},
+            ),
+            (
+                "api/v1/datasets?uri_pattern=",
+                {
+                    'gcp://bucket/key',
+                    's3://folder/key',
+                    'somescheme://dataset/key',
+                    "wasb://some_dataset_bucket_/key",
+                },
+            ),
+        ]
+    )
+    @provide_session
+    def test_filter_datasets_by_uri_pattern_works(self, url, expected_datasets, session):
+        dataset1 = Dataset("s3://folder/key")
+        dataset2 = Dataset("gcp://bucket/key")
+        dataset3 = Dataset("somescheme://dataset/key")
+        dataset4 = Dataset("wasb://some_dataset_bucket_/key")
+        session.add_all([dataset1, dataset2, dataset3, dataset4])
+        session.commit()
+        response = self.client.get(url, environ_overrides={'REMOTE_USER': "test"})
+        assert response.status_code == 200
+        dataset_urls = {dataset['uri'] for dataset in response.json['datasets']}
+        assert expected_datasets == dataset_urls
+
 
 class TestGetDatasetsEndpointPagination(TestDatasetEndpoint):
     @parameterized.expand(