You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by bb...@apache.org on 2022/08/17 21:43:33 UTC
[airflow] branch main updated: get dataset by uri instead of if in rest api (#25770)
This is an automated email from the ASF dual-hosted git repository.
bbovenzi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 94d3e78025 get dataset by uri instead of if in rest api (#25770)
94d3e78025 is described below
commit 94d3e78025cca558dda26027adf4e446ad41943b
Author: Brent Bovenzi <br...@gmail.com>
AuthorDate: Wed Aug 17 22:43:22 2022 +0100
get dataset by uri instead of if in rest api (#25770)
* get dataset by uri in rest api
* encode uri in test
---
.../api_connexion/endpoints/dataset_endpoint.py | 7 +-
airflow/api_connexion/openapi/v1.yaml | 15 +-
airflow/www/static/js/api/useDataset.ts | 8 +-
airflow/www/static/js/api/useDatasetEvents.ts | 4 +-
airflow/www/static/js/components/Table/Cells.tsx | 4 +-
airflow/www/static/js/datasetUtils.js | 2 +-
airflow/www/static/js/datasets/DatasetEvents.tsx | 89 +++++++++++
airflow/www/static/js/datasets/Details.tsx | 170 +++++++--------------
airflow/www/static/js/datasets/List.tsx | 2 +-
airflow/www/static/js/datasets/index.tsx | 14 +-
airflow/www/static/js/types/api-generated.ts | 18 +--
.../endpoints/test_dataset_endpoint.py | 17 ++-
12 files changed, 192 insertions(+), 158 deletions(-)
diff --git a/airflow/api_connexion/endpoints/dataset_endpoint.py b/airflow/api_connexion/endpoints/dataset_endpoint.py
index cd9ad2a7e9..deb4f8a4bd 100644
--- a/airflow/api_connexion/endpoints/dataset_endpoint.py
+++ b/airflow/api_connexion/endpoints/dataset_endpoint.py
@@ -38,17 +38,18 @@ from airflow.utils.session import NEW_SESSION, provide_session
@security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_DATASET)])
@provide_session
-def get_dataset(id: int, session: Session = NEW_SESSION) -> APIResponse:
+def get_dataset(uri: str, session: Session = NEW_SESSION) -> APIResponse:
"""Get a Dataset"""
dataset = (
session.query(DatasetModel)
+ .filter(DatasetModel.uri == uri)
.options(joinedload(DatasetModel.consuming_dags), joinedload(DatasetModel.producing_tasks))
- .get(id)
+ .one_or_none()
)
if not dataset:
raise NotFound(
"Dataset not found",
- detail=f"The Dataset with id: `{id}` was not found",
+ detail=f"The Dataset with uri: `{uri}` was not found",
)
return dataset_schema.dump(dataset)
diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml
index 1056d63a74..e5bb89a4fe 100644
--- a/airflow/api_connexion/openapi/v1.yaml
+++ b/airflow/api_connexion/openapi/v1.yaml
@@ -1667,12 +1667,12 @@ paths:
'403':
$ref: '#/components/responses/PermissionDenied'
- /datasets/{id}:
+ /datasets/{uri}:
parameters:
- - $ref: '#/components/parameters/DatasetID'
+ - $ref: '#/components/parameters/DatasetURI'
get:
summary: Get a dataset
- description: Get a dataset by id.
+ description: Get a dataset by uri.
x-openapi-router-controller: airflow.api_connexion.endpoints.dataset_endpoint
operationId: get_dataset
tags: [Dataset]
@@ -4313,13 +4313,14 @@ components:
required: true
description: The import error ID.
- DatasetID:
+ DatasetURI:
in: path
- name: id
+ name: uri
schema:
- type: integer
+ type: string
+ format: path
required: true
- description: The Dataset ID
+ description: The encoded Dataset URI
PoolName:
in: path
diff --git a/airflow/www/static/js/api/useDataset.ts b/airflow/www/static/js/api/useDataset.ts
index 9149f1c159..6f213f5ca4 100644
--- a/airflow/www/static/js/api/useDataset.ts
+++ b/airflow/www/static/js/api/useDataset.ts
@@ -24,14 +24,14 @@ import { getMetaValue } from 'src/utils';
import type { API } from 'src/types';
interface Props {
- datasetId: string;
+ datasetUri: string;
}
-export default function useDataset({ datasetId }: Props) {
+export default function useDataset({ datasetUri }: Props) {
return useQuery(
- ['dataset', datasetId],
+ ['dataset', datasetUri],
() => {
- const datasetUrl = `${getMetaValue('datasets_api') || '/api/v1/datasets'}/${datasetId}`;
+ const datasetUrl = `${getMetaValue('datasets_api') || '/api/v1/datasets'}/${encodeURIComponent(datasetUri)}`;
return axios.get<AxiosResponse, API.Dataset>(datasetUrl);
},
);
diff --git a/airflow/www/static/js/api/useDatasetEvents.ts b/airflow/www/static/js/api/useDatasetEvents.ts
index 7df7c91a1c..9412bceea2 100644
--- a/airflow/www/static/js/api/useDatasetEvents.ts
+++ b/airflow/www/static/js/api/useDatasetEvents.ts
@@ -29,7 +29,7 @@ interface DatasetEventsData {
}
interface Props {
- datasetId?: string;
+ datasetId?: number;
dagId?: string;
taskId?: string;
runId?: string;
@@ -52,7 +52,7 @@ export default function useDatasetEvents({
if (limit) params.set('limit', limit.toString());
if (offset) params.set('offset', offset.toString());
if (order) params.set('order_by', order);
- if (datasetId) params.set('dataset_id', datasetId);
+ if (datasetId) params.set('dataset_id', datasetId.toString());
if (dagId) params.set('source_dag_id', dagId);
if (runId) params.set('source_run_id', runId);
if (taskId) params.set('source_task_id', taskId);
diff --git a/airflow/www/static/js/components/Table/Cells.tsx b/airflow/www/static/js/components/Table/Cells.tsx
index b111a8b6dd..92d7080164 100644
--- a/airflow/www/static/js/components/Table/Cells.tsx
+++ b/airflow/www/static/js/components/Table/Cells.tsx
@@ -36,12 +36,12 @@ interface CellProps {
export const TimeCell = ({ cell: { value } }: CellProps) => <Time dateTime={value} />;
-export const DatasetLink = ({ cell: { value, row } }: CellProps) => {
+export const DatasetLink = ({ cell: { value } }: CellProps) => {
const datasetsUrl = getMetaValue('datasets_url');
return (
<Link
color="blue.600"
- href={`${datasetsUrl}?dataset_id=${row.original.datasetId}`}
+ href={`${datasetsUrl}?dataset_uri=${encodeURIComponent(value)}`}
>
{value}
</Link>
diff --git a/airflow/www/static/js/datasetUtils.js b/airflow/www/static/js/datasetUtils.js
index 82e9771ae4..63353738d4 100644
--- a/airflow/www/static/js/datasetUtils.js
+++ b/airflow/www/static/js/datasetUtils.js
@@ -33,7 +33,7 @@ export function openDatasetModal(dagId, summary = '', nextDatasets = [], error =
const uriCell = document.createElement('td');
const datasetLink = document.createElement('a');
- datasetLink.href = `${datasetsUrl}?dataset_id=${d.id}`;
+ datasetLink.href = `${datasetsUrl}?dataset_uri=${encodeURIComponent(d.id)}`;
datasetLink.innerText = d.uri;
uriCell.append(datasetLink);
diff --git a/airflow/www/static/js/datasets/DatasetEvents.tsx b/airflow/www/static/js/datasets/DatasetEvents.tsx
new file mode 100644
index 0000000000..384aebd67e
--- /dev/null
+++ b/airflow/www/static/js/datasets/DatasetEvents.tsx
@@ -0,0 +1,89 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import React, { useMemo, useState } from 'react';
+import { snakeCase } from 'lodash';
+import type { SortingRule } from 'react-table';
+
+import { useDatasetEvents } from 'src/api';
+import {
+ Table, TimeCell, TaskInstanceLink,
+} from 'src/components/Table';
+
+const Events = ({
+ datasetId,
+}: { datasetId: number }) => {
+ const limit = 25;
+ const [offset, setOffset] = useState(0);
+ const [sortBy, setSortBy] = useState<SortingRule<object>[]>([{ id: 'timestamp', desc: true }]);
+
+ const sort = sortBy[0];
+ const order = sort ? `${sort.desc ? '-' : ''}${snakeCase(sort.id)}` : '';
+
+ const {
+ data: { datasetEvents, totalEntries },
+ isLoading: isEventsLoading,
+ } = useDatasetEvents({
+ datasetId, limit, offset, order,
+ });
+
+ const columns = useMemo(
+ () => [
+ {
+ Header: 'Source Task Instance',
+ accessor: 'sourceTaskId',
+ Cell: TaskInstanceLink,
+ },
+ {
+ Header: 'When',
+ accessor: 'timestamp',
+ Cell: TimeCell,
+ },
+ ],
+ [],
+ );
+
+ const data = useMemo(
+ () => datasetEvents,
+ [datasetEvents],
+ );
+
+ const memoSort = useMemo(() => sortBy, [sortBy]);
+
+ return (
+ <Table
+ data={data}
+ columns={columns}
+ manualPagination={{
+ offset,
+ setOffset,
+ totalEntries,
+ }}
+ manualSort={{
+ setSortBy,
+ sortBy,
+ initialSortBy: memoSort,
+ }}
+ pageSize={limit}
+ isLoading={isEventsLoading}
+ />
+ );
+};
+
+export default Events;
diff --git a/airflow/www/static/js/datasets/Details.tsx b/airflow/www/static/js/datasets/Details.tsx
index d871fe317f..e4815b4fcd 100644
--- a/airflow/www/static/js/datasets/Details.tsx
+++ b/airflow/www/static/js/datasets/Details.tsx
@@ -17,146 +17,82 @@
* under the License.
*/
-import React, { useMemo, useState } from 'react';
+import React from 'react';
import {
Box, Heading, Flex, Text, Spinner, Button, Link,
} from '@chakra-ui/react';
-import { snakeCase } from 'lodash';
-import type { SortingRule } from 'react-table';
-import { useDatasetEvents, useDataset } from 'src/api';
-import {
- Table, TimeCell, TaskInstanceLink,
-} from 'src/components/Table';
+import { useDataset } from 'src/api';
import { ClipboardButton } from 'src/components/Clipboard';
-import type { API } from 'src/types';
import InfoTooltip from 'src/components/InfoTooltip';
import { getMetaValue } from 'src/utils';
+import Events from './DatasetEvents';
interface Props {
- datasetId: string;
+ datasetUri: string;
onBack: () => void;
}
const gridUrl = getMetaValue('grid_url');
-const Details = ({
- dataset: {
- uri,
- producingTasks,
- consumingDags,
- },
-}: { dataset: API.Dataset }) => (
- <Box>
- <Heading my={2} fontWeight="normal">
- Dataset:
- {' '}
- {uri}
- <ClipboardButton value={uri} iconOnly ml={2} />
- </Heading>
- {producingTasks && !!producingTasks.length && (
- <Box mb={2}>
- <Flex alignItems="center">
- <Heading size="md" fontWeight="normal">Producing Tasks</Heading>
- <InfoTooltip label="Tasks that will update this dataset." size={14} />
- </Flex>
- {producingTasks.map(({ dagId, taskId }) => (
- <Link
- key={`${dagId}.${taskId}`}
- color="blue.600"
- href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
- display="block"
- >
- {`${dagId}.${taskId}`}
- </Link>
- ))}
- </Box>
- )}
- {consumingDags && !!consumingDags.length && (
- <Box>
- <Flex alignItems="center">
- <Heading size="md" fontWeight="normal">Consuming DAGs</Heading>
- <InfoTooltip label="DAGs that depend on this dataset updating to trigger a run." size={14} />
- </Flex>
- {consumingDags.map(({ dagId }) => (
- <Link
- key={dagId}
- color="blue.600"
- href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
- display="block"
- >
- {dagId}
- </Link>
- ))}
- </Box>
- )}
- </Box>
-);
-
-const DatasetDetails = ({ datasetId, onBack }: Props) => {
- const limit = 25;
- const [offset, setOffset] = useState(0);
- const [sortBy, setSortBy] = useState<SortingRule<object>[]>([{ id: 'timestamp', desc: true }]);
-
- const sort = sortBy[0];
- const order = sort ? `${sort.desc ? '-' : ''}${snakeCase(sort.id)}` : '';
-
- const { data: dataset, isLoading } = useDataset({ datasetId });
- const {
- data: { datasetEvents, totalEntries },
- isLoading: isEventsLoading,
- } = useDatasetEvents({
- datasetId, limit, offset, order,
- });
-
- const columns = useMemo(
- () => [
- {
- Header: 'Source Task Instance',
- accessor: 'sourceTaskId',
- Cell: TaskInstanceLink,
- },
- {
- Header: 'When',
- accessor: 'timestamp',
- Cell: TimeCell,
- },
- ],
- [],
- );
-
- const data = useMemo(
- () => datasetEvents,
- [datasetEvents],
- );
-
- const memoSort = useMemo(() => sortBy, [sortBy]);
+const DatasetDetails = ({ datasetUri, onBack }: Props) => {
+ const { data: dataset, isLoading } = useDataset({ datasetUri });
return (
<Box mt={[6, 3]} maxWidth="1500px">
<Button onClick={onBack}>See all datasets</Button>
{isLoading && <Spinner display="block" />}
- {!!dataset && (<Details dataset={dataset} />)}
+ <Box>
+ <Heading my={2} fontWeight="normal">
+ Dataset:
+ {' '}
+ {datasetUri}
+ <ClipboardButton value={datasetUri} iconOnly ml={2} />
+ </Heading>
+ {dataset?.producingTasks && !!dataset.producingTasks.length && (
+ <Box mb={2}>
+ <Flex alignItems="center">
+ <Heading size="md" fontWeight="normal">Producing Tasks</Heading>
+ <InfoTooltip label="Tasks that will update this dataset." size={14} />
+ </Flex>
+ {dataset.producingTasks.map(({ dagId, taskId }) => (
+ <Link
+ key={`${dagId}.${taskId}`}
+ color="blue.600"
+ href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
+ display="block"
+ >
+ {`${dagId}.${taskId}`}
+ </Link>
+ ))}
+ </Box>
+ )}
+ {dataset?.consumingDags && !!dataset.consumingDags.length && (
+ <Box>
+ <Flex alignItems="center">
+ <Heading size="md" fontWeight="normal">Consuming DAGs</Heading>
+ <InfoTooltip label="DAGs that depend on this dataset updating to trigger a run." size={14} />
+ </Flex>
+ {dataset.consumingDags.map(({ dagId }) => (
+ <Link
+ key={dagId}
+ color="blue.600"
+ href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
+ display="block"
+ >
+ {dagId}
+ </Link>
+ ))}
+ </Box>
+ )}
+ </Box>
<Box>
<Heading size="lg" mt={3} mb={2} fontWeight="normal">History</Heading>
<Text>Whenever a DAG has updated this dataset.</Text>
</Box>
- <Table
- data={data}
- columns={columns}
- manualPagination={{
- offset,
- setOffset,
- totalEntries,
- }}
- manualSort={{
- setSortBy,
- sortBy,
- initialSortBy: memoSort,
- }}
- pageSize={limit}
- isLoading={isEventsLoading}
- />
+ {dataset?.id && (
+ <Events datasetId={dataset.id} />
+ )}
</Box>
);
};
diff --git a/airflow/www/static/js/datasets/List.tsx b/airflow/www/static/js/datasets/List.tsx
index 4765baa02d..580c68b5c7 100644
--- a/airflow/www/static/js/datasets/List.tsx
+++ b/airflow/www/static/js/datasets/List.tsx
@@ -92,7 +92,7 @@ const DatasetsList = ({ onSelect }: Props) => {
);
const onDatasetSelect = (row: Row<API.Dataset>) => {
- if (row.original.id) onSelect(row.original.id.toString());
+ if (row.original.uri) onSelect(row.original.uri);
};
const docsUrl = getMetaValue('datasets_docs');
diff --git a/airflow/www/static/js/datasets/index.tsx b/airflow/www/static/js/datasets/index.tsx
index 3b26b442b8..deb2dcb9ab 100644
--- a/airflow/www/static/js/datasets/index.tsx
+++ b/airflow/www/static/js/datasets/index.tsx
@@ -39,24 +39,24 @@ const cache = createCache({
});
const mainElement = document.getElementById('react-container');
-const DATASET_ID = 'dataset_id';
+const DATASET_URI = 'dataset_uri';
const Datasets = () => {
const [searchParams, setSearchParams] = useSearchParams();
const onBack = () => {
- searchParams.delete(DATASET_ID);
+ searchParams.delete(DATASET_URI);
setSearchParams(searchParams);
};
- const onSelect = (datasetId: string) => {
- searchParams.set(DATASET_ID, datasetId);
+ const onSelect = (datasetUri: string) => {
+ searchParams.set(DATASET_URI, datasetUri);
setSearchParams(searchParams);
};
- const datasetId = searchParams.get(DATASET_ID);
- return (datasetId
- ? <DatasetDetails datasetId={datasetId} onBack={onBack} />
+ const datasetUri = decodeURIComponent(searchParams.get(DATASET_URI) || '');
+ return (datasetUri
+ ? <DatasetDetails datasetUri={datasetUri} onBack={onBack} />
: <DatasetsList onSelect={onSelect} />
);
};
diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts
index 8df89497f7..19e5f4f94e 100644
--- a/airflow/www/static/js/types/api-generated.ts
+++ b/airflow/www/static/js/types/api-generated.ts
@@ -491,13 +491,13 @@ export interface paths {
"/datasets": {
get: operations["get_datasets"];
};
- "/datasets/{id}": {
- /** Get a dataset by id. */
+ "/datasets/{uri}": {
+ /** Get a dataset by uri. */
get: operations["get_dataset"];
parameters: {
path: {
- /** The Dataset ID */
- id: components["parameters"]["DatasetID"];
+ /** The encoded Dataset URI */
+ uri: components["parameters"]["DatasetURI"];
};
};
};
@@ -2027,8 +2027,8 @@ export interface components {
EventLogID: number;
/** @description The import error ID. */
ImportErrorID: number;
- /** @description The Dataset ID */
- DatasetID: number;
+ /** @description The encoded Dataset URI */
+ DatasetURI: string;
/** @description The pool name. */
PoolName: string;
/** @description The variable Key. */
@@ -3619,12 +3619,12 @@ export interface operations {
403: components["responses"]["PermissionDenied"];
};
};
- /** Get a dataset by id. */
+ /** Get a dataset by uri. */
get_dataset: {
parameters: {
path: {
- /** The Dataset ID */
- id: components["parameters"]["DatasetID"];
+ /** The encoded Dataset URI */
+ uri: components["parameters"]["DatasetURI"];
};
};
responses: {
diff --git a/tests/api_connexion/endpoints/test_dataset_endpoint.py b/tests/api_connexion/endpoints/test_dataset_endpoint.py
index b940b91d56..6bcb876f7b 100644
--- a/tests/api_connexion/endpoints/test_dataset_endpoint.py
+++ b/tests/api_connexion/endpoints/test_dataset_endpoint.py
@@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+import urllib
+
import pytest
from parameterized import parameterized
@@ -80,8 +82,10 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
assert session.query(DatasetModel).count() == 1
with assert_queries_count(5):
- response = self.client.get("/api/v1/datasets/1", environ_overrides={'REMOTE_USER': "test"})
-
+ response = self.client.get(
+ f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}",
+ environ_overrides={'REMOTE_USER': "test"},
+ )
assert response.status_code == 200
assert response.json == {
"id": 1,
@@ -94,10 +98,13 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
}
def test_should_respond_404(self):
- response = self.client.get("/api/v1/datasets/1", environ_overrides={'REMOTE_USER': "test"})
+ response = self.client.get(
+ f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}",
+ environ_overrides={'REMOTE_USER': "test"},
+ )
assert response.status_code == 404
assert {
- 'detail': "The Dataset with id: `1` was not found",
+ 'detail': "The Dataset with uri: `s3://bucket/key` was not found",
'status': 404,
'title': 'Dataset not found',
'type': EXCEPTIONS_LINK_MAP[404],
@@ -105,7 +112,7 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
def test_should_raises_401_unauthenticated(self, session):
self._create_dataset(session)
- response = self.client.get("/api/v1/datasets/1")
+ response = self.client.get(f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}")
assert_401(response)