You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2022/07/30 06:23:53 UTC
[airflow] branch main updated: Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)
This is an automated email from the ASF dual-hosted git repository.
ephraimanierobi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new d21947ef8c Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)
d21947ef8c is described below
commit d21947ef8cfb7ea4700a6de4d0dee168e7b2f693
Author: Ephraim Anierobi <sp...@gmail.com>
AuthorDate: Sat Jul 30 07:23:42 2022 +0100
Add `uri_pattern` query param to Get `/datasets` endpoint (#25411)
---
.../api_connexion/endpoints/dataset_endpoint.py | 9 +++++-
airflow/api_connexion/openapi/v1.yaml | 7 +++++
airflow/www/static/js/types/api-generated.ts | 2 ++
.../endpoints/test_dataset_endpoint.py | 32 ++++++++++++++++++++++
4 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/airflow/api_connexion/endpoints/dataset_endpoint.py b/airflow/api_connexion/endpoints/dataset_endpoint.py
index b277f138d5..b6d4ac0ca5 100644
--- a/airflow/api_connexion/endpoints/dataset_endpoint.py
+++ b/airflow/api_connexion/endpoints/dataset_endpoint.py
@@ -57,13 +57,20 @@ def get_dataset(id: int, session: Session = NEW_SESSION) -> APIResponse:
@format_parameters({'limit': check_limit})
@provide_session
def get_datasets(
- *, limit: int, offset: int = 0, order_by: str = "id", session: Session = NEW_SESSION
+ *,
+ limit: int,
+ offset: int = 0,
+ uri_pattern: Optional[str] = None,
+ order_by: str = "id",
+ session: Session = NEW_SESSION,
) -> APIResponse:
"""Get datasets"""
allowed_attrs = ['id', 'uri', 'created_at', 'updated_at']
total_entries = session.query(func.count(Dataset.id)).scalar()
query = session.query(Dataset)
+ if uri_pattern:
+ query = query.filter(Dataset.uri.ilike(f"%{uri_pattern}%"))
query = apply_sorting(query, order_by, {}, allowed_attrs)
datasets = (
query.options(
diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml
index a1e751066e..27b1c7a98a 100644
--- a/airflow/api_connexion/openapi/v1.yaml
+++ b/airflow/api_connexion/openapi/v1.yaml
@@ -1647,6 +1647,13 @@ paths:
- $ref: '#/components/parameters/PageLimit'
- $ref: '#/components/parameters/PageOffset'
- $ref: '#/components/parameters/OrderBy'
+ - name: uri_pattern
+ in: query
+ schema:
+ type: string
+ required: false
+ description: |
+ If set, only return datasets with uris matching this pattern.
responses:
'200':
description: Success.
diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts
index 9b67414ed2..b0d7110d64 100644
--- a/airflow/www/static/js/types/api-generated.ts
+++ b/airflow/www/static/js/types/api-generated.ts
@@ -3569,6 +3569,8 @@ export interface operations {
* *New in version 2.1.0*
*/
order_by?: components["parameters"]["OrderBy"];
+ /** If set, only return datasets with uris matching this pattern. */
+ uri_pattern?: string;
};
};
responses: {
diff --git a/tests/api_connexion/endpoints/test_dataset_endpoint.py b/tests/api_connexion/endpoints/test_dataset_endpoint.py
index 0207db090d..6e71592785 100644
--- a/tests/api_connexion/endpoints/test_dataset_endpoint.py
+++ b/tests/api_connexion/endpoints/test_dataset_endpoint.py
@@ -194,6 +194,38 @@ class TestGetDatasets(TestDatasetEndpoint):
assert_401(response)
+ @parameterized.expand(
+ [
+ ("api/v1/datasets?uri_pattern=s3", {"s3://folder/key"}),
+ ("api/v1/datasets?uri_pattern=bucket", {"gcp://bucket/key", 'wasb://some_dataset_bucket_/key'}),
+ (
+ "api/v1/datasets?uri_pattern=dataset",
+ {"somescheme://dataset/key", "wasb://some_dataset_bucket_/key"},
+ ),
+ (
+ "api/v1/datasets?uri_pattern=",
+ {
+ 'gcp://bucket/key',
+ 's3://folder/key',
+ 'somescheme://dataset/key',
+ "wasb://some_dataset_bucket_/key",
+ },
+ ),
+ ]
+ )
+ @provide_session
+ def test_filter_datasets_by_uri_pattern_works(self, url, expected_datasets, session):
+ dataset1 = Dataset("s3://folder/key")
+ dataset2 = Dataset("gcp://bucket/key")
+ dataset3 = Dataset("somescheme://dataset/key")
+ dataset4 = Dataset("wasb://some_dataset_bucket_/key")
+ session.add_all([dataset1, dataset2, dataset3, dataset4])
+ session.commit()
+ response = self.client.get(url, environ_overrides={'REMOTE_USER': "test"})
+ assert response.status_code == 200
+ dataset_urls = {dataset['uri'] for dataset in response.json['datasets']}
+ assert expected_datasets == dataset_urls
+
class TestGetDatasetsEndpointPagination(TestDatasetEndpoint):
@parameterized.expand(