You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by bb...@apache.org on 2022/08/17 21:43:33 UTC

[airflow] branch main updated: get dataset by uri instead of if in rest api (#25770)

This is an automated email from the ASF dual-hosted git repository.

bbovenzi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 94d3e78025 get dataset by uri instead of if in rest api (#25770)
94d3e78025 is described below

commit 94d3e78025cca558dda26027adf4e446ad41943b
Author: Brent Bovenzi <br...@gmail.com>
AuthorDate: Wed Aug 17 22:43:22 2022 +0100

    get dataset by uri instead of if in rest api (#25770)
    
    * get dataset by uri in rest api
    
    * encode uri in test
---
 .../api_connexion/endpoints/dataset_endpoint.py    |   7 +-
 airflow/api_connexion/openapi/v1.yaml              |  15 +-
 airflow/www/static/js/api/useDataset.ts            |   8 +-
 airflow/www/static/js/api/useDatasetEvents.ts      |   4 +-
 airflow/www/static/js/components/Table/Cells.tsx   |   4 +-
 airflow/www/static/js/datasetUtils.js              |   2 +-
 airflow/www/static/js/datasets/DatasetEvents.tsx   |  89 +++++++++++
 airflow/www/static/js/datasets/Details.tsx         | 170 +++++++--------------
 airflow/www/static/js/datasets/List.tsx            |   2 +-
 airflow/www/static/js/datasets/index.tsx           |  14 +-
 airflow/www/static/js/types/api-generated.ts       |  18 +--
 .../endpoints/test_dataset_endpoint.py             |  17 ++-
 12 files changed, 192 insertions(+), 158 deletions(-)

diff --git a/airflow/api_connexion/endpoints/dataset_endpoint.py b/airflow/api_connexion/endpoints/dataset_endpoint.py
index cd9ad2a7e9..deb4f8a4bd 100644
--- a/airflow/api_connexion/endpoints/dataset_endpoint.py
+++ b/airflow/api_connexion/endpoints/dataset_endpoint.py
@@ -38,17 +38,18 @@ from airflow.utils.session import NEW_SESSION, provide_session
 
 @security.requires_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_DATASET)])
 @provide_session
-def get_dataset(id: int, session: Session = NEW_SESSION) -> APIResponse:
+def get_dataset(uri: str, session: Session = NEW_SESSION) -> APIResponse:
     """Get a Dataset"""
     dataset = (
         session.query(DatasetModel)
+        .filter(DatasetModel.uri == uri)
         .options(joinedload(DatasetModel.consuming_dags), joinedload(DatasetModel.producing_tasks))
-        .get(id)
+        .one_or_none()
     )
     if not dataset:
         raise NotFound(
             "Dataset not found",
-            detail=f"The Dataset with id: `{id}` was not found",
+            detail=f"The Dataset with uri: `{uri}` was not found",
         )
     return dataset_schema.dump(dataset)
 
diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml
index 1056d63a74..e5bb89a4fe 100644
--- a/airflow/api_connexion/openapi/v1.yaml
+++ b/airflow/api_connexion/openapi/v1.yaml
@@ -1667,12 +1667,12 @@ paths:
         '403':
           $ref: '#/components/responses/PermissionDenied'
 
-  /datasets/{id}:
+  /datasets/{uri}:
     parameters:
-      - $ref: '#/components/parameters/DatasetID'
+      - $ref: '#/components/parameters/DatasetURI'
     get:
       summary: Get a dataset
-      description: Get a dataset by id.
+      description: Get a dataset by uri.
       x-openapi-router-controller: airflow.api_connexion.endpoints.dataset_endpoint
       operationId: get_dataset
       tags: [Dataset]
@@ -4313,13 +4313,14 @@ components:
       required: true
       description: The import error ID.
 
-    DatasetID:
+    DatasetURI:
       in: path
-      name: id
+      name: uri
       schema:
-        type: integer
+        type: string
+        format: path
       required: true
-      description: The Dataset ID
+      description: The encoded Dataset URI
 
     PoolName:
       in: path
diff --git a/airflow/www/static/js/api/useDataset.ts b/airflow/www/static/js/api/useDataset.ts
index 9149f1c159..6f213f5ca4 100644
--- a/airflow/www/static/js/api/useDataset.ts
+++ b/airflow/www/static/js/api/useDataset.ts
@@ -24,14 +24,14 @@ import { getMetaValue } from 'src/utils';
 import type { API } from 'src/types';
 
 interface Props {
-  datasetId: string;
+  datasetUri: string;
 }
 
-export default function useDataset({ datasetId }: Props) {
+export default function useDataset({ datasetUri }: Props) {
   return useQuery(
-    ['dataset', datasetId],
+    ['dataset', datasetUri],
     () => {
-      const datasetUrl = `${getMetaValue('datasets_api') || '/api/v1/datasets'}/${datasetId}`;
+      const datasetUrl = `${getMetaValue('datasets_api') || '/api/v1/datasets'}/${encodeURIComponent(datasetUri)}`;
       return axios.get<AxiosResponse, API.Dataset>(datasetUrl);
     },
   );
diff --git a/airflow/www/static/js/api/useDatasetEvents.ts b/airflow/www/static/js/api/useDatasetEvents.ts
index 7df7c91a1c..9412bceea2 100644
--- a/airflow/www/static/js/api/useDatasetEvents.ts
+++ b/airflow/www/static/js/api/useDatasetEvents.ts
@@ -29,7 +29,7 @@ interface DatasetEventsData {
 }
 
 interface Props {
-  datasetId?: string;
+  datasetId?: number;
   dagId?: string;
   taskId?: string;
   runId?: string;
@@ -52,7 +52,7 @@ export default function useDatasetEvents({
       if (limit) params.set('limit', limit.toString());
       if (offset) params.set('offset', offset.toString());
       if (order) params.set('order_by', order);
-      if (datasetId) params.set('dataset_id', datasetId);
+      if (datasetId) params.set('dataset_id', datasetId.toString());
       if (dagId) params.set('source_dag_id', dagId);
       if (runId) params.set('source_run_id', runId);
       if (taskId) params.set('source_task_id', taskId);
diff --git a/airflow/www/static/js/components/Table/Cells.tsx b/airflow/www/static/js/components/Table/Cells.tsx
index b111a8b6dd..92d7080164 100644
--- a/airflow/www/static/js/components/Table/Cells.tsx
+++ b/airflow/www/static/js/components/Table/Cells.tsx
@@ -36,12 +36,12 @@ interface CellProps {
 
 export const TimeCell = ({ cell: { value } }: CellProps) => <Time dateTime={value} />;
 
-export const DatasetLink = ({ cell: { value, row } }: CellProps) => {
+export const DatasetLink = ({ cell: { value } }: CellProps) => {
   const datasetsUrl = getMetaValue('datasets_url');
   return (
     <Link
       color="blue.600"
-      href={`${datasetsUrl}?dataset_id=${row.original.datasetId}`}
+      href={`${datasetsUrl}?dataset_uri=${encodeURIComponent(value)}`}
     >
       {value}
     </Link>
diff --git a/airflow/www/static/js/datasetUtils.js b/airflow/www/static/js/datasetUtils.js
index 82e9771ae4..63353738d4 100644
--- a/airflow/www/static/js/datasetUtils.js
+++ b/airflow/www/static/js/datasetUtils.js
@@ -33,7 +33,7 @@ export function openDatasetModal(dagId, summary = '', nextDatasets = [], error =
 
     const uriCell = document.createElement('td');
     const datasetLink = document.createElement('a');
-    datasetLink.href = `${datasetsUrl}?dataset_id=${d.id}`;
+    datasetLink.href = `${datasetsUrl}?dataset_uri=${encodeURIComponent(d.id)}`;
     datasetLink.innerText = d.uri;
     uriCell.append(datasetLink);
 
diff --git a/airflow/www/static/js/datasets/DatasetEvents.tsx b/airflow/www/static/js/datasets/DatasetEvents.tsx
new file mode 100644
index 0000000000..384aebd67e
--- /dev/null
+++ b/airflow/www/static/js/datasets/DatasetEvents.tsx
@@ -0,0 +1,89 @@
+/*!
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import React, { useMemo, useState } from 'react';
+import { snakeCase } from 'lodash';
+import type { SortingRule } from 'react-table';
+
+import { useDatasetEvents } from 'src/api';
+import {
+  Table, TimeCell, TaskInstanceLink,
+} from 'src/components/Table';
+
+const Events = ({
+  datasetId,
+}: { datasetId: number }) => {
+  const limit = 25;
+  const [offset, setOffset] = useState(0);
+  const [sortBy, setSortBy] = useState<SortingRule<object>[]>([{ id: 'timestamp', desc: true }]);
+
+  const sort = sortBy[0];
+  const order = sort ? `${sort.desc ? '-' : ''}${snakeCase(sort.id)}` : '';
+
+  const {
+    data: { datasetEvents, totalEntries },
+    isLoading: isEventsLoading,
+  } = useDatasetEvents({
+    datasetId, limit, offset, order,
+  });
+
+  const columns = useMemo(
+    () => [
+      {
+        Header: 'Source Task Instance',
+        accessor: 'sourceTaskId',
+        Cell: TaskInstanceLink,
+      },
+      {
+        Header: 'When',
+        accessor: 'timestamp',
+        Cell: TimeCell,
+      },
+    ],
+    [],
+  );
+
+  const data = useMemo(
+    () => datasetEvents,
+    [datasetEvents],
+  );
+
+  const memoSort = useMemo(() => sortBy, [sortBy]);
+
+  return (
+    <Table
+      data={data}
+      columns={columns}
+      manualPagination={{
+        offset,
+        setOffset,
+        totalEntries,
+      }}
+      manualSort={{
+        setSortBy,
+        sortBy,
+        initialSortBy: memoSort,
+      }}
+      pageSize={limit}
+      isLoading={isEventsLoading}
+    />
+  );
+};
+
+export default Events;
diff --git a/airflow/www/static/js/datasets/Details.tsx b/airflow/www/static/js/datasets/Details.tsx
index d871fe317f..e4815b4fcd 100644
--- a/airflow/www/static/js/datasets/Details.tsx
+++ b/airflow/www/static/js/datasets/Details.tsx
@@ -17,146 +17,82 @@
  * under the License.
  */
 
-import React, { useMemo, useState } from 'react';
+import React from 'react';
 import {
   Box, Heading, Flex, Text, Spinner, Button, Link,
 } from '@chakra-ui/react';
-import { snakeCase } from 'lodash';
-import type { SortingRule } from 'react-table';
 
-import { useDatasetEvents, useDataset } from 'src/api';
-import {
-  Table, TimeCell, TaskInstanceLink,
-} from 'src/components/Table';
+import { useDataset } from 'src/api';
 import { ClipboardButton } from 'src/components/Clipboard';
-import type { API } from 'src/types';
 import InfoTooltip from 'src/components/InfoTooltip';
 import { getMetaValue } from 'src/utils';
+import Events from './DatasetEvents';
 
 interface Props {
-  datasetId: string;
+  datasetUri: string;
   onBack: () => void;
 }
 
 const gridUrl = getMetaValue('grid_url');
 
-const Details = ({
-  dataset: {
-    uri,
-    producingTasks,
-    consumingDags,
-  },
-}: { dataset: API.Dataset }) => (
-  <Box>
-    <Heading my={2} fontWeight="normal">
-      Dataset:
-      {' '}
-      {uri}
-      <ClipboardButton value={uri} iconOnly ml={2} />
-    </Heading>
-    {producingTasks && !!producingTasks.length && (
-    <Box mb={2}>
-      <Flex alignItems="center">
-        <Heading size="md" fontWeight="normal">Producing Tasks</Heading>
-        <InfoTooltip label="Tasks that will update this dataset." size={14} />
-      </Flex>
-      {producingTasks.map(({ dagId, taskId }) => (
-        <Link
-          key={`${dagId}.${taskId}`}
-          color="blue.600"
-          href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
-          display="block"
-        >
-          {`${dagId}.${taskId}`}
-        </Link>
-      ))}
-    </Box>
-    )}
-    {consumingDags && !!consumingDags.length && (
-    <Box>
-      <Flex alignItems="center">
-        <Heading size="md" fontWeight="normal">Consuming DAGs</Heading>
-        <InfoTooltip label="DAGs that depend on this dataset updating to trigger a run." size={14} />
-      </Flex>
-      {consumingDags.map(({ dagId }) => (
-        <Link
-          key={dagId}
-          color="blue.600"
-          href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
-          display="block"
-        >
-          {dagId}
-        </Link>
-      ))}
-    </Box>
-    )}
-  </Box>
-);
-
-const DatasetDetails = ({ datasetId, onBack }: Props) => {
-  const limit = 25;
-  const [offset, setOffset] = useState(0);
-  const [sortBy, setSortBy] = useState<SortingRule<object>[]>([{ id: 'timestamp', desc: true }]);
-
-  const sort = sortBy[0];
-  const order = sort ? `${sort.desc ? '-' : ''}${snakeCase(sort.id)}` : '';
-
-  const { data: dataset, isLoading } = useDataset({ datasetId });
-  const {
-    data: { datasetEvents, totalEntries },
-    isLoading: isEventsLoading,
-  } = useDatasetEvents({
-    datasetId, limit, offset, order,
-  });
-
-  const columns = useMemo(
-    () => [
-      {
-        Header: 'Source Task Instance',
-        accessor: 'sourceTaskId',
-        Cell: TaskInstanceLink,
-      },
-      {
-        Header: 'When',
-        accessor: 'timestamp',
-        Cell: TimeCell,
-      },
-    ],
-    [],
-  );
-
-  const data = useMemo(
-    () => datasetEvents,
-    [datasetEvents],
-  );
-
-  const memoSort = useMemo(() => sortBy, [sortBy]);
+const DatasetDetails = ({ datasetUri, onBack }: Props) => {
+  const { data: dataset, isLoading } = useDataset({ datasetUri });
 
   return (
     <Box mt={[6, 3]} maxWidth="1500px">
       <Button onClick={onBack}>See all datasets</Button>
       {isLoading && <Spinner display="block" />}
-      {!!dataset && (<Details dataset={dataset} />)}
+      <Box>
+        <Heading my={2} fontWeight="normal">
+          Dataset:
+          {' '}
+          {datasetUri}
+          <ClipboardButton value={datasetUri} iconOnly ml={2} />
+        </Heading>
+        {dataset?.producingTasks && !!dataset.producingTasks.length && (
+        <Box mb={2}>
+          <Flex alignItems="center">
+            <Heading size="md" fontWeight="normal">Producing Tasks</Heading>
+            <InfoTooltip label="Tasks that will update this dataset." size={14} />
+          </Flex>
+          {dataset.producingTasks.map(({ dagId, taskId }) => (
+            <Link
+              key={`${dagId}.${taskId}`}
+              color="blue.600"
+              href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
+              display="block"
+            >
+              {`${dagId}.${taskId}`}
+            </Link>
+          ))}
+        </Box>
+        )}
+        {dataset?.consumingDags && !!dataset.consumingDags.length && (
+        <Box>
+          <Flex alignItems="center">
+            <Heading size="md" fontWeight="normal">Consuming DAGs</Heading>
+            <InfoTooltip label="DAGs that depend on this dataset updating to trigger a run." size={14} />
+          </Flex>
+          {dataset.consumingDags.map(({ dagId }) => (
+            <Link
+              key={dagId}
+              color="blue.600"
+              href={dagId ? gridUrl?.replace('__DAG_ID__', dagId) : ''}
+              display="block"
+            >
+              {dagId}
+            </Link>
+          ))}
+        </Box>
+        )}
+      </Box>
       <Box>
         <Heading size="lg" mt={3} mb={2} fontWeight="normal">History</Heading>
         <Text>Whenever a DAG has updated this dataset.</Text>
       </Box>
-      <Table
-        data={data}
-        columns={columns}
-        manualPagination={{
-          offset,
-          setOffset,
-          totalEntries,
-        }}
-        manualSort={{
-          setSortBy,
-          sortBy,
-          initialSortBy: memoSort,
-        }}
-        pageSize={limit}
-        isLoading={isEventsLoading}
-      />
+      {dataset?.id && (
+        <Events datasetId={dataset.id} />
+      )}
     </Box>
   );
 };
diff --git a/airflow/www/static/js/datasets/List.tsx b/airflow/www/static/js/datasets/List.tsx
index 4765baa02d..580c68b5c7 100644
--- a/airflow/www/static/js/datasets/List.tsx
+++ b/airflow/www/static/js/datasets/List.tsx
@@ -92,7 +92,7 @@ const DatasetsList = ({ onSelect }: Props) => {
   );
 
   const onDatasetSelect = (row: Row<API.Dataset>) => {
-    if (row.original.id) onSelect(row.original.id.toString());
+    if (row.original.uri) onSelect(row.original.uri);
   };
 
   const docsUrl = getMetaValue('datasets_docs');
diff --git a/airflow/www/static/js/datasets/index.tsx b/airflow/www/static/js/datasets/index.tsx
index 3b26b442b8..deb2dcb9ab 100644
--- a/airflow/www/static/js/datasets/index.tsx
+++ b/airflow/www/static/js/datasets/index.tsx
@@ -39,24 +39,24 @@ const cache = createCache({
 });
 const mainElement = document.getElementById('react-container');
 
-const DATASET_ID = 'dataset_id';
+const DATASET_URI = 'dataset_uri';
 
 const Datasets = () => {
   const [searchParams, setSearchParams] = useSearchParams();
 
   const onBack = () => {
-    searchParams.delete(DATASET_ID);
+    searchParams.delete(DATASET_URI);
     setSearchParams(searchParams);
   };
 
-  const onSelect = (datasetId: string) => {
-    searchParams.set(DATASET_ID, datasetId);
+  const onSelect = (datasetUri: string) => {
+    searchParams.set(DATASET_URI, datasetUri);
     setSearchParams(searchParams);
   };
 
-  const datasetId = searchParams.get(DATASET_ID);
-  return (datasetId
-    ? <DatasetDetails datasetId={datasetId} onBack={onBack} />
+  const datasetUri = decodeURIComponent(searchParams.get(DATASET_URI) || '');
+  return (datasetUri
+    ? <DatasetDetails datasetUri={datasetUri} onBack={onBack} />
     : <DatasetsList onSelect={onSelect} />
   );
 };
diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts
index 8df89497f7..19e5f4f94e 100644
--- a/airflow/www/static/js/types/api-generated.ts
+++ b/airflow/www/static/js/types/api-generated.ts
@@ -491,13 +491,13 @@ export interface paths {
   "/datasets": {
     get: operations["get_datasets"];
   };
-  "/datasets/{id}": {
-    /** Get a dataset by id. */
+  "/datasets/{uri}": {
+    /** Get a dataset by uri. */
     get: operations["get_dataset"];
     parameters: {
       path: {
-        /** The Dataset ID */
-        id: components["parameters"]["DatasetID"];
+        /** The encoded Dataset URI */
+        uri: components["parameters"]["DatasetURI"];
       };
     };
   };
@@ -2027,8 +2027,8 @@ export interface components {
     EventLogID: number;
     /** @description The import error ID. */
     ImportErrorID: number;
-    /** @description The Dataset ID */
-    DatasetID: number;
+    /** @description The encoded Dataset URI */
+    DatasetURI: string;
     /** @description The pool name. */
     PoolName: string;
     /** @description The variable Key. */
@@ -3619,12 +3619,12 @@ export interface operations {
       403: components["responses"]["PermissionDenied"];
     };
   };
-  /** Get a dataset by id. */
+  /** Get a dataset by uri. */
   get_dataset: {
     parameters: {
       path: {
-        /** The Dataset ID */
-        id: components["parameters"]["DatasetID"];
+        /** The encoded Dataset URI */
+        uri: components["parameters"]["DatasetURI"];
       };
     };
     responses: {
diff --git a/tests/api_connexion/endpoints/test_dataset_endpoint.py b/tests/api_connexion/endpoints/test_dataset_endpoint.py
index b940b91d56..6bcb876f7b 100644
--- a/tests/api_connexion/endpoints/test_dataset_endpoint.py
+++ b/tests/api_connexion/endpoints/test_dataset_endpoint.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import urllib
+
 import pytest
 from parameterized import parameterized
 
@@ -80,8 +82,10 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
         assert session.query(DatasetModel).count() == 1
 
         with assert_queries_count(5):
-            response = self.client.get("/api/v1/datasets/1", environ_overrides={'REMOTE_USER': "test"})
-
+            response = self.client.get(
+                f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}",
+                environ_overrides={'REMOTE_USER': "test"},
+            )
         assert response.status_code == 200
         assert response.json == {
             "id": 1,
@@ -94,10 +98,13 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
         }
 
     def test_should_respond_404(self):
-        response = self.client.get("/api/v1/datasets/1", environ_overrides={'REMOTE_USER': "test"})
+        response = self.client.get(
+            f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}",
+            environ_overrides={'REMOTE_USER': "test"},
+        )
         assert response.status_code == 404
         assert {
-            'detail': "The Dataset with id: `1` was not found",
+            'detail': "The Dataset with uri: `s3://bucket/key` was not found",
             'status': 404,
             'title': 'Dataset not found',
             'type': EXCEPTIONS_LINK_MAP[404],
@@ -105,7 +112,7 @@ class TestGetDatasetEndpoint(TestDatasetEndpoint):
 
     def test_should_raises_401_unauthenticated(self, session):
         self._create_dataset(session)
-        response = self.client.get("/api/v1/datasets/1")
+        response = self.client.get(f"/api/v1/datasets/{urllib.parse.quote('s3://bucket/key', safe='')}")
         assert_401(response)