You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2022/10/18 13:10:18 UTC
[airflow] 02/41: Add last_updated_at and total_updates to datasets list view (#26358)
This is an automated email from the ASF dual-hosted git repository.
ephraimanierobi pushed a commit to branch v2-4-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 48ce0eaca286f80e1c7b01ee957b877cfa89f27f
Author: blag <bl...@users.noreply.github.com>
AuthorDate: Wed Oct 5 06:51:09 2022 -0700
Add last_updated_at and total_updates to datasets list view (#26358)
* wip
* add update info to datasets list in UI
* wip
* wip
* Cleanup
* Fixups and cleanups
* Add support for MySQL sorting by null first/last
* Add support for MSSQL sorting by null first/last
* Remove unnecessary extras
Co-authored-by: Jed Cunningham <66...@users.noreply.github.com>
* Clear dataset-related objects before and after tests
Co-authored-by: Jed Cunningham <66...@users.noreply.github.com>
* Revert using single letters for datasets
* Remove self.default_time and its uses
* Betterize fixture creation
* Remove comment
* Add __future__.annotations import
* Black formatting
* Sort nulls last, using normal SQL rules
* Fixup: code and tests
* Add missing import
* Expand tests to try to get more dataset events than correct
* Fix aggregate counts for datasets with multiple producing task or multiple consuming dags
* Fix COUNT DISTINCT for MySQL and MSSQL
* change last update to a sortable column
* server-side sorting
* Entirely remove producing_task_count and consuming_dag_count from the query and response
* Remove some now-unnecessary outer joins
* Rename endpoint to the more accurate datasets_summary
* Rename dataset summary function to match endpoint
* fixup
Co-authored-by: Brent Bovenzi <br...@gmail.com>
Co-authored-by: Jed Cunningham <je...@apache.org>
Co-authored-by: Brent Bovenzi <br...@gmail.com>
Co-authored-by: Jed Cunningham <66...@users.noreply.github.com>
(cherry picked from commit d600cbd5fe9f0cd7101ff572190a8e5e64110e22)
---
airflow/www/static/js/api/useDataset.ts | 8 +-
airflow/www/static/js/api/useDatasets.ts | 6 +-
airflow/www/static/js/datasets/List.tsx | 47 +++-
airflow/www/static/js/types/index.ts | 6 +
airflow/www/templates/airflow/datasets.html | 3 +-
airflow/www/views.py | 76 ++++++-
tests/www/views/test_views_dataset.py | 332 ++++++++++++++++++++++++++++
7 files changed, 464 insertions(+), 14 deletions(-)
diff --git a/airflow/www/static/js/api/useDataset.ts b/airflow/www/static/js/api/useDataset.ts
index 900bdbc2d1..4633b47242 100644
--- a/airflow/www/static/js/api/useDataset.ts
+++ b/airflow/www/static/js/api/useDataset.ts
@@ -23,11 +23,15 @@ import { useQuery } from 'react-query';
import { getMetaValue } from 'src/utils';
import type { API } from 'src/types';
-export default function useDataset({ uri }: API.GetDatasetVariables) {
+interface Props {
+ uri: string;
+}
+
+export default function useDataset({ uri }: Props) {
return useQuery(
['dataset', uri],
() => {
- const datasetUrl = `${getMetaValue('datasets_api') || '/api/v1/datasets'}/${encodeURIComponent(uri)}`;
+ const datasetUrl = getMetaValue('dataset_api').replace('__URI__', encodeURIComponent(uri));
return axios.get<AxiosResponse, API.Dataset>(datasetUrl);
},
);
diff --git a/airflow/www/static/js/api/useDatasets.ts b/airflow/www/static/js/api/useDatasets.ts
index 34e5bd93ec..ae92ffacbd 100644
--- a/airflow/www/static/js/api/useDatasets.ts
+++ b/airflow/www/static/js/api/useDatasets.ts
@@ -21,10 +21,10 @@ import axios, { AxiosResponse } from 'axios';
import { useQuery } from 'react-query';
import { getMetaValue } from 'src/utils';
-import type { API } from 'src/types';
+import type { DatasetListItem } from 'src/types';
interface DatasetsData {
- datasets: API.Dataset[];
+ datasets: DatasetListItem[];
totalEntries: number;
}
@@ -41,7 +41,7 @@ export default function useDatasets({
const query = useQuery(
['datasets', limit, offset, order, uri],
() => {
- const datasetsUrl = getMetaValue('datasets_api') || '/api/v1/datasets';
+ const datasetsUrl = getMetaValue('datasets_api');
const orderParam = order ? { order_by: order } : {};
const uriParam = uri ? { uri_pattern: uri } : {};
return axios.get<AxiosResponse, DatasetsData>(
diff --git a/airflow/www/static/js/datasets/List.tsx b/airflow/www/static/js/datasets/List.tsx
index 8e38cbefbd..8ac1bb7622 100644
--- a/airflow/www/static/js/datasets/List.tsx
+++ b/airflow/www/static/js/datasets/List.tsx
@@ -25,33 +25,66 @@ import {
Text,
Link,
} from '@chakra-ui/react';
-import { snakeCase } from 'lodash';
import type { Row, SortingRule } from 'react-table';
import { useDatasets } from 'src/api';
-import { Table } from 'src/components/Table';
+import { Table, TimeCell } from 'src/components/Table';
import type { API } from 'src/types';
import { getMetaValue } from 'src/utils';
+import { snakeCase } from 'lodash';
interface Props {
onSelect: (datasetId: string) => void;
}
+interface CellProps {
+ cell: {
+ value: any;
+ row: {
+ original: Record<string, any>;
+ }
+ }
+}
+
+const DetailCell = ({ cell: { row } }: CellProps) => {
+ const { totalUpdates, uri } = row.original;
+ return (
+ <Box>
+ <Text>{uri}</Text>
+ <Text fontSize="sm" mt={2}>
+ Total Updates:
+ {' '}
+ {totalUpdates}
+ </Text>
+ </Box>
+ );
+};
+
const DatasetsList = ({ onSelect }: Props) => {
const limit = 25;
const [offset, setOffset] = useState(0);
- const [sortBy, setSortBy] = useState<SortingRule<object>[]>([]);
+ const [sortBy, setSortBy] = useState<SortingRule<object>[]>([{ id: 'lastDatasetUpdate', desc: true }]);
const sort = sortBy[0];
const order = sort ? `${sort.desc ? '-' : ''}${snakeCase(sort.id)}` : '';
- const { data: { datasets, totalEntries }, isLoading } = useDatasets({ limit, offset, order });
+ const { data: { datasets, totalEntries }, isLoading } = useDatasets({
+ limit,
+ offset,
+ order,
+ });
const columns = useMemo(
() => [
{
Header: 'URI',
accessor: 'uri',
+ Cell: DetailCell,
+ },
+ {
+ Header: 'Last Update',
+ accessor: 'lastDatasetUpdate',
+ Cell: TimeCell,
},
],
[],
@@ -61,6 +94,7 @@ const DatasetsList = ({ onSelect }: Props) => {
() => datasets,
[datasets],
);
+ const memoSort = useMemo(() => sortBy, [sortBy]);
const onDatasetSelect = (row: Row<API.Dataset>) => {
if (row.original.uri) onSelect(row.original.uri);
@@ -76,7 +110,7 @@ const DatasetsList = ({ onSelect }: Props) => {
</Heading>
</Flex>
{!datasets.length && !isLoading && (
- <Text>
+ <Text mb={4}>
Looks like you do not have any datasets yet. Check out the
{' '}
<Link color="blue" href={docsUrl} isExternal>docs</Link>
@@ -94,11 +128,12 @@ const DatasetsList = ({ onSelect }: Props) => {
setOffset,
totalEntries,
}}
- pageSize={limit}
manualSort={{
setSortBy,
sortBy,
+ initialSortBy: memoSort,
}}
+ pageSize={limit}
onRowClicked={onDatasetSelect}
/>
</Box>
diff --git a/airflow/www/static/js/types/index.ts b/airflow/www/static/js/types/index.ts
index d1386b8ee1..bf0f7d9b9c 100644
--- a/airflow/www/static/js/types/index.ts
+++ b/airflow/www/static/js/types/index.ts
@@ -97,6 +97,11 @@ interface DepEdge {
v: string;
}
+interface DatasetListItem extends API.Dataset {
+ lastDatasetUpdate: string | null;
+ totalUpdates: number;
+}
+
export type {
Dag,
DagRun,
@@ -108,4 +113,5 @@ export type {
DepEdge,
API,
RunOrdering,
+ DatasetListItem,
};
diff --git a/airflow/www/templates/airflow/datasets.html b/airflow/www/templates/airflow/datasets.html
index 36c7a9b689..6b3555c616 100644
--- a/airflow/www/templates/airflow/datasets.html
+++ b/airflow/www/templates/airflow/datasets.html
@@ -22,7 +22,8 @@
{% block head_meta %}
{{ super() }}
- <meta name="datasets_api" content="{{ url_for('/api/v1.airflow_api_connexion_endpoints_dataset_endpoint_get_datasets') }}">
+ <meta name="datasets_api" content="{{ url_for('Airflow.datasets_summary') }}">
+ <meta name="dataset_api" content="{{ url_for('/api/v1.airflow_api_connexion_endpoints_dataset_endpoint_get_dataset', uri='__URI__') }}">
<meta name="dataset_events_api" content="{{ url_for('/api/v1.airflow_api_connexion_endpoints_dataset_endpoint_get_dataset_events') }}">
<meta name="grid_url" content="{{ url_for('Airflow.grid', dag_id='__DAG_ID__') }}">
<meta name="datasets_docs" content="{{ get_docs_url('concepts/datasets.html') }}">
diff --git a/airflow/www/views.py b/airflow/www/views.py
index b1d3c1209b..3fe94e0fd9 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -71,7 +71,7 @@ from pendulum.datetime import DateTime
from pendulum.parsing.exceptions import ParserError
from pygments import highlight, lexers
from pygments.formatters import HtmlFormatter
-from sqlalchemy import Date, and_, desc, func, inspect, union_all
+from sqlalchemy import Date, and_, desc, distinct, func, inspect, union_all
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, joinedload
from wtforms import SelectField, validators
@@ -108,7 +108,7 @@ from airflow.ti_deps.dep_context import DepContext
from airflow.ti_deps.dependencies_deps import RUNNING_DEPS, SCHEDULER_QUEUED_DEPS
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.interval import CronDataIntervalTimetable
-from airflow.utils import timezone, yaml
+from airflow.utils import json as utils_json, timezone, yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.dag_edges import dag_edges
from airflow.utils.dates import infer_time_unit, scale_time_units
@@ -3547,6 +3547,78 @@ class Airflow(AirflowBaseView):
{'Content-Type': 'application/json; charset=utf-8'},
)
+ @expose('/object/datasets_summary')
+ @auth.has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_DATASET)])
+ def datasets_summary(self):
+ """Get a summary of datasets, including the datetime they were last updated and how many updates
+ they've ever had
+ """
+ allowed_attrs = ['uri', 'last_dataset_update']
+
+ limit = int(request.args.get("limit", 25))
+ offset = int(request.args.get("offset", 0))
+ order_by = request.args.get("order_by", "uri")
+ lstripped_orderby = order_by.lstrip('-')
+
+ if lstripped_orderby not in allowed_attrs:
+ return {
+ "detail": (
+ f"Ordering with '{lstripped_orderby}' is disallowed or the attribute does not "
+ "exist on the model"
+ )
+ }, 400
+
+ limit = 50 if limit > 50 else limit
+
+ with create_session() as session:
+ if lstripped_orderby == "uri":
+ if order_by[0] == "-":
+ order_by = (DatasetModel.uri.desc(),)
+ else:
+ order_by = (DatasetModel.uri.asc(),)
+ elif lstripped_orderby == "last_dataset_update":
+ if order_by[0] == "-":
+ order_by = (
+ func.max(DatasetEvent.timestamp).desc(),
+ DatasetModel.uri.asc(),
+ )
+ if session.bind.dialect.name == "postgresql":
+ order_by = (order_by[0].nulls_last(), *order_by[1:])
+ else:
+ order_by = (
+ func.max(DatasetEvent.timestamp).asc(),
+ DatasetModel.uri.desc(),
+ )
+ if session.bind.dialect.name == "postgresql":
+ order_by = (order_by[0].nulls_first(), *order_by[1:])
+
+ total_entries = session.query(func.count(DatasetModel.id)).scalar()
+
+ datasets = [
+ dict(dataset)
+ for dataset in session.query(
+ DatasetModel.id,
+ DatasetModel.uri,
+ func.max(DatasetEvent.timestamp).label("last_dataset_update"),
+ func.count(distinct(DatasetEvent.id)).label("total_updates"),
+ )
+ .outerjoin(DatasetEvent, DatasetEvent.dataset_id == DatasetModel.id)
+ .group_by(
+ DatasetModel.id,
+ DatasetModel.uri,
+ )
+ .order_by(*order_by)
+ .offset(offset)
+ .limit(limit)
+ .all()
+ ]
+ data = {"datasets": datasets, "total_entries": total_entries}
+
+ return (
+ htmlsafe_json_dumps(data, separators=(',', ':'), cls=utils_json.AirflowJsonEncoder),
+ {'Content-Type': 'application/json; charset=utf-8'},
+ )
+
@expose('/robots.txt')
@action_logging
def robots(self):
diff --git a/tests/www/views/test_views_dataset.py b/tests/www/views/test_views_dataset.py
new file mode 100644
index 0000000000..c67298876d
--- /dev/null
+++ b/tests/www/views/test_views_dataset.py
@@ -0,0 +1,332 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import pendulum
+import pytest
+from dateutil.tz import UTC
+
+from airflow import Dataset
+from airflow.models.dataset import DatasetEvent, DatasetModel
+from airflow.operators.empty import EmptyOperator
+from tests.test_utils.asserts import assert_queries_count
+from tests.test_utils.db import clear_db_datasets
+
+
+class TestDatasetEndpoint:
+ @pytest.fixture(autouse=True)
+ def cleanup(self):
+ clear_db_datasets()
+ yield
+ clear_db_datasets()
+
+
+class TestGetDatasets(TestDatasetEndpoint):
+ def test_should_respond_200(self, admin_client, session):
+ datasets = [
+ DatasetModel(
+ id=i,
+ uri=f"s3://bucket/key/{i}",
+ )
+ for i in [1, 2]
+ ]
+ session.add_all(datasets)
+ session.commit()
+ assert session.query(DatasetModel).count() == 2
+
+ with assert_queries_count(8):
+ response = admin_client.get("/object/datasets_summary")
+
+ assert response.status_code == 200
+ response_data = response.json
+ assert response_data == {
+ "datasets": [
+ {
+ "id": 1,
+ "uri": "s3://bucket/key/1",
+ "last_dataset_update": None,
+ "total_updates": 0,
+ },
+ {
+ "id": 2,
+ "uri": "s3://bucket/key/2",
+ "last_dataset_update": None,
+ "total_updates": 0,
+ },
+ ],
+ "total_entries": 2,
+ }
+
+ def test_order_by_raises_400_for_invalid_attr(self, admin_client, session):
+ datasets = [
+ DatasetModel(
+ uri=f"s3://bucket/key/{i}",
+ )
+ for i in [1, 2]
+ ]
+ session.add_all(datasets)
+ session.commit()
+ assert session.query(DatasetModel).count() == 2
+
+ response = admin_client.get("/object/datasets_summary?order_by=fake")
+
+ assert response.status_code == 400
+ msg = "Ordering with 'fake' is disallowed or the attribute does not exist on the model"
+ assert response.json['detail'] == msg
+
+ @pytest.mark.parametrize(
+ "order_by, ordered_dataset_ids",
+ [
+ ("uri", [1, 2, 3, 4]),
+ ("-uri", [4, 3, 2, 1]),
+ ("last_dataset_update", [4, 1, 3, 2]),
+ ("-last_dataset_update", [2, 3, 1, 4]),
+ ],
+ )
+ def test_order_by(self, admin_client, session, order_by, ordered_dataset_ids):
+ datasets = [
+ DatasetModel(
+ id=i,
+ uri=f"s3://bucket/key/{i}",
+ )
+ for i in range(1, len(ordered_dataset_ids) + 1)
+ ]
+ session.add_all(datasets)
+ dataset_events = [
+ DatasetEvent(
+ dataset_id=datasets[2].id,
+ timestamp=pendulum.today('UTC').add(days=-3),
+ ),
+ DatasetEvent(
+ dataset_id=datasets[1].id,
+ timestamp=pendulum.today('UTC').add(days=-2),
+ ),
+ DatasetEvent(
+ dataset_id=datasets[1].id,
+ timestamp=pendulum.today('UTC').add(days=-1),
+ ),
+ ]
+ session.add_all(dataset_events)
+ session.commit()
+ assert session.query(DatasetModel).count() == len(ordered_dataset_ids)
+
+ response = admin_client.get(f"/object/datasets_summary?order_by={order_by}")
+
+ assert response.status_code == 200
+ assert ordered_dataset_ids == [json_dict['id'] for json_dict in response.json['datasets']]
+ assert response.json['total_entries'] == len(ordered_dataset_ids)
+
+ @pytest.mark.need_serialized_dag
+ def test_correct_counts_update(self, admin_client, session, dag_maker, app, monkeypatch):
+ with monkeypatch.context() as m:
+ datasets = [Dataset(uri=f"s3://bucket/key/{i}") for i in [1, 2, 3, 4, 5]]
+
+ # DAG that produces dataset #1
+ with dag_maker(dag_id='upstream', schedule=None, serialized=True, session=session):
+ EmptyOperator(task_id='task1', outlets=[datasets[0]])
+
+ # DAG that is consumes only datasets #1 and #2
+ with dag_maker(dag_id='downstream', schedule=datasets[:2], serialized=True, session=session):
+ EmptyOperator(task_id='task1')
+
+ # We create multiple dataset-producing and dataset-consuming DAGs because the query requires
+ # COUNT(DISTINCT ...) for total_updates, or else it returns a multiple of the correct number due
+ # to the outer joins with DagScheduleDatasetReference and TaskOutletDatasetReference
+ # Two independent DAGs that produce dataset #3
+ with dag_maker(dag_id='independent_producer_1', serialized=True, session=session):
+ EmptyOperator(task_id='task1', outlets=[datasets[2]])
+ with dag_maker(dag_id='independent_producer_2', serialized=True, session=session):
+ EmptyOperator(task_id='task1', outlets=[datasets[2]])
+ # Two independent DAGs that consume dataset #4
+ with dag_maker(
+ dag_id='independent_consumer_1',
+ schedule=[datasets[3]],
+ serialized=True,
+ session=session,
+ ):
+ EmptyOperator(task_id='task1')
+ with dag_maker(
+ dag_id='independent_consumer_2',
+ schedule=[datasets[3]],
+ serialized=True,
+ session=session,
+ ):
+ EmptyOperator(task_id='task1')
+
+ # Independent DAG that is produces and consumes the same dataset, #5
+ with dag_maker(
+ dag_id='independent_producer_self_consumer',
+ schedule=[datasets[4]],
+ serialized=True,
+ session=session,
+ ):
+ EmptyOperator(task_id='task1', outlets=[datasets[4]])
+
+ m.setattr(app, 'dag_bag', dag_maker.dagbag)
+
+ ds1_id = session.query(DatasetModel.id).filter_by(uri=datasets[0].uri).scalar()
+ ds2_id = session.query(DatasetModel.id).filter_by(uri=datasets[1].uri).scalar()
+ ds3_id = session.query(DatasetModel.id).filter_by(uri=datasets[2].uri).scalar()
+ ds4_id = session.query(DatasetModel.id).filter_by(uri=datasets[3].uri).scalar()
+ ds5_id = session.query(DatasetModel.id).filter_by(uri=datasets[4].uri).scalar()
+
+ # dataset 1 events
+ session.add_all(
+ [
+ DatasetEvent(
+ dataset_id=ds1_id,
+ timestamp=pendulum.DateTime(2022, 8, 1, i, tzinfo=UTC),
+ )
+ for i in range(3)
+ ]
+ )
+ # dataset 3 events
+ session.add_all(
+ [
+ DatasetEvent(
+ dataset_id=ds3_id,
+ timestamp=pendulum.DateTime(2022, 8, 1, i, tzinfo=UTC),
+ )
+ for i in range(3)
+ ]
+ )
+ # dataset 4 events
+ session.add_all(
+ [
+ DatasetEvent(
+ dataset_id=ds4_id,
+ timestamp=pendulum.DateTime(2022, 8, 1, i, tzinfo=UTC),
+ )
+ for i in range(4)
+ ]
+ )
+ # dataset 5 events
+ session.add_all(
+ [
+ DatasetEvent(
+ dataset_id=ds5_id,
+ timestamp=pendulum.DateTime(2022, 8, 1, i, tzinfo=UTC),
+ )
+ for i in range(5)
+ ]
+ )
+ session.commit()
+
+ response = admin_client.get("/object/datasets_summary")
+
+ assert response.status_code == 200
+ response_data = response.json
+ assert response_data == {
+ "datasets": [
+ {
+ "id": ds1_id,
+ "uri": "s3://bucket/key/1",
+ "last_dataset_update": "2022-08-01T02:00:00+00:00",
+ "total_updates": 3,
+ },
+ {
+ "id": ds2_id,
+ "uri": "s3://bucket/key/2",
+ "last_dataset_update": None,
+ "total_updates": 0,
+ },
+ {
+ "id": ds3_id,
+ "uri": "s3://bucket/key/3",
+ "last_dataset_update": "2022-08-01T02:00:00+00:00",
+ "total_updates": 3,
+ },
+ {
+ "id": ds4_id,
+ "uri": "s3://bucket/key/4",
+ "last_dataset_update": "2022-08-01T03:00:00+00:00",
+ "total_updates": 4,
+ },
+ {
+ "id": ds5_id,
+ "uri": "s3://bucket/key/5",
+ "last_dataset_update": "2022-08-01T04:00:00+00:00",
+ "total_updates": 5,
+ },
+ ],
+ "total_entries": 5,
+ }
+
+
+class TestGetDatasetsEndpointPagination(TestDatasetEndpoint):
+ @pytest.mark.parametrize(
+ "url, expected_dataset_uris",
+ [
+ # Limit test data
+ ("/object/datasets_summary?limit=1", ["s3://bucket/key/1"]),
+ ("/object/datasets_summary?limit=5", [f"s3://bucket/key/{i}" for i in range(1, 6)]),
+ # Offset test data
+ ("/object/datasets_summary?offset=1", [f"s3://bucket/key/{i}" for i in range(2, 10)]),
+ ("/object/datasets_summary?offset=3", [f"s3://bucket/key/{i}" for i in range(4, 10)]),
+ # Limit and offset test data
+ ("/object/datasets_summary?offset=3&limit=3", [f"s3://bucket/key/{i}" for i in [4, 5, 6]]),
+ ],
+ )
+ def test_limit_and_offset(self, admin_client, session, url, expected_dataset_uris):
+ datasets = [
+ DatasetModel(
+ uri=f"s3://bucket/key/{i}",
+ extra={"foo": "bar"},
+ )
+ for i in range(1, 10)
+ ]
+ session.add_all(datasets)
+ session.commit()
+
+ response = admin_client.get(url)
+
+ assert response.status_code == 200
+ dataset_uris = [dataset["uri"] for dataset in response.json["datasets"]]
+ assert dataset_uris == expected_dataset_uris
+
+ def test_should_respect_page_size_limit_default(self, admin_client, session):
+ datasets = [
+ DatasetModel(
+ uri=f"s3://bucket/key/{i}",
+ extra={"foo": "bar"},
+ )
+ for i in range(1, 60)
+ ]
+ session.add_all(datasets)
+ session.commit()
+
+ response = admin_client.get("/object/datasets_summary")
+
+ assert response.status_code == 200
+ assert len(response.json['datasets']) == 25
+
+ def test_should_return_max_if_req_above(self, admin_client, session):
+ datasets = [
+ DatasetModel(
+ uri=f"s3://bucket/key/{i}",
+ extra={"foo": "bar"},
+ )
+ for i in range(1, 60)
+ ]
+ session.add_all(datasets)
+ session.commit()
+
+ response = admin_client.get("/object/datasets_summary?limit=180")
+
+ assert response.status_code == 200
+ assert len(response.json['datasets']) == 50