You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by el...@apache.org on 2023/02/18 19:29:42 UTC

[airflow] branch main updated: `GoogleDriveHook`: Add folder_id param to upload_file (#29477)

This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new f37772adfd `GoogleDriveHook`: Add folder_id param to upload_file (#29477)
f37772adfd is described below

commit f37772adfdfdee8763147e0563897e4d5d5657c8
Author: Shahar Epstein <60...@users.noreply.github.com>
AuthorDate: Sat Feb 18 21:29:35 2023 +0200

    `GoogleDriveHook`: Add folder_id param to upload_file (#29477)
    
    * add folder_id param to upload_file
    ---------
    
    Co-authored-by: Lucas Fernando Nunes <lu...@gmail.com>
    Co-authored-by: eladkal <45...@users.noreply.github.com>
    Co-authored-by: John Bampton <jb...@users.noreply.github.com>
---
 airflow/providers/google/suite/hooks/drive.py      | 24 +++++++++++++++-----
 .../google/suite/transfers/local_to_drive.py       |  4 ++++
 tests/providers/google/suite/hooks/test_drive.py   | 26 ++++++++++++++++------
 .../google/suite/transfers/test_local_to_drive.py  |  8 ++++++-
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/airflow/providers/google/suite/hooks/drive.py b/airflow/providers/google/suite/hooks/drive.py
index abfae6201b..777cbc3e77 100644
--- a/airflow/providers/google/suite/hooks/drive.py
+++ b/airflow/providers/google/suite/hooks/drive.py
@@ -72,9 +72,9 @@ class GoogleDriveHook(GoogleBaseHook):
             self._conn = build("drive", self.api_version, http=http_authorized, cache_discovery=False)
         return self._conn
 
-    def _ensure_folders_exists(self, path: str) -> str:
+    def _ensure_folders_exists(self, path: str, folder_id: str) -> str:
         service = self.get_conn()
-        current_parent = "root"
+        current_parent = folder_id
         folders = path.split("/")
         depth = 0
         # First tries to enter directories
@@ -88,7 +88,13 @@ class GoogleDriveHook(GoogleBaseHook):
             ]
             result = (
                 service.files()
-                .list(q=" and ".join(conditions), spaces="drive", fields="files(id, name)")
+                .list(
+                    q=" and ".join(conditions),
+                    spaces="drive",
+                    fields="files(id, name)",
+                    includeItemsFromAllDrives=True,
+                    supportsAllDrives=True,
+                )
                 .execute(num_retries=self.num_retries)
             )
             files = result.get("files", [])
@@ -110,7 +116,11 @@ class GoogleDriveHook(GoogleBaseHook):
                 }
                 file = (
                     service.files()
-                    .create(body=file_metadata, fields="id")
+                    .create(
+                        body=file_metadata,
+                        fields="id",
+                        supportsAllDrives=True,
+                    )
                     .execute(num_retries=self.num_retries)
                 )
                 self.log.info("Created %s directory", current_folder)
@@ -202,6 +212,7 @@ class GoogleDriveHook(GoogleBaseHook):
         remote_location: str,
         chunk_size: int = 100 * 1024 * 1024,
         resumable: bool = False,
+        folder_id: str = "root",
     ) -> str:
         """
         Uploads a file that is available locally to a Google Drive service.
@@ -215,14 +226,15 @@ class GoogleDriveHook(GoogleBaseHook):
             or to -1.
         :param resumable: True if this is a resumable upload. False means upload
             in a single request.
+        :param folder_id: The base/root folder id for remote_location (part of the drive URL of a folder).
         :return: File ID
         """
         service = self.get_conn()
         directory_path, _, file_name = remote_location.rpartition("/")
         if directory_path:
-            parent = self._ensure_folders_exists(directory_path)
+            parent = self._ensure_folders_exists(path=directory_path, folder_id=folder_id)
         else:
-            parent = "root"
+            parent = folder_id
 
         file_metadata = {"name": file_name, "parents": [parent]}
         media = MediaFileUpload(local_location, chunksize=chunk_size, resumable=resumable)
diff --git a/airflow/providers/google/suite/transfers/local_to_drive.py b/airflow/providers/google/suite/transfers/local_to_drive.py
index 228def2c60..14b6f2da4b 100644
--- a/airflow/providers/google/suite/transfers/local_to_drive.py
+++ b/airflow/providers/google/suite/transfers/local_to_drive.py
@@ -63,6 +63,7 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator):
         If set as a sequence, the identities from the list must grant
         Service Account Token Creator IAM role to the directly preceding identity, with first
         account from the list granting this role to the originating account
+    :param folder_id: The base/root folder id for each local path in the Drive folder
     :return: Remote file ids after upload
     """
 
@@ -82,6 +83,7 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator):
         resumable: bool = False,
         delegate_to: str | None = None,
         impersonation_chain: str | Sequence[str] | None = None,
+        folder_id: str = "root",
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
@@ -94,6 +96,7 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator):
         self.resumable = resumable
         self.delegate_to = delegate_to
         self.impersonation_chain = impersonation_chain
+        self.folder_id = folder_id
 
     def execute(self, context: Context) -> list[str]:
         hook = GoogleDriveHook(
@@ -113,6 +116,7 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator):
                     remote_location=str(Path(self.drive_folder) / Path(local_path).name),
                     chunk_size=self.chunk_size,
                     resumable=self.resumable,
+                    folder_id=self.folder_id,
                 )
 
                 remote_file_ids.append(remote_file_id)
diff --git a/tests/providers/google/suite/hooks/test_drive.py b/tests/providers/google/suite/hooks/test_drive.py
index 7670b270ec..d132307c02 100644
--- a/tests/providers/google/suite/hooks/test_drive.py
+++ b/tests/providers/google/suite/hooks/test_drive.py
@@ -53,19 +53,21 @@ class TestGoogleDriveHook:
             {"id": "ID_4"},
         ]
 
-        result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
+        result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
 
         mock_get_conn.assert_has_calls(
             [
                 mock.call()
                 .files()
                 .list(
+                    fields="files(id, name)",
+                    includeItemsFromAllDrives=True,
                     q=(
                         "trashed=false and mimeType='application/vnd.google-apps.folder' "
                         "and name='AAA' and 'root' in parents"
                     ),
                     spaces="drive",
-                    fields="files(id, name)",
+                    supportsAllDrives=True,
                 ),
                 mock.call()
                 .files()
@@ -76,6 +78,7 @@ class TestGoogleDriveHook:
                         "parents": ["root"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
                 mock.call()
                 .files()
@@ -86,6 +89,7 @@ class TestGoogleDriveHook:
                         "parents": ["ID_1"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
                 mock.call()
                 .files()
@@ -96,6 +100,7 @@ class TestGoogleDriveHook:
                         "parents": ["ID_2"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
                 mock.call()
                 .files()
@@ -106,6 +111,7 @@ class TestGoogleDriveHook:
                         "parents": ["ID_3"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
             ],
             any_order=True,
@@ -125,7 +131,7 @@ class TestGoogleDriveHook:
             {"id": "ID_4"},
         ]
 
-        result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
+        result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
 
         mock_get_conn.assert_has_calls(
             [
@@ -133,12 +139,14 @@ class TestGoogleDriveHook:
                     mock.call()
                     .files()
                     .list(
+                        fields="files(id, name)",
+                        includeItemsFromAllDrives=True,
                         q=(
                             "trashed=false and mimeType='application/vnd.google-apps.folder' "
                             f"and name='{d}' and '{key}' in parents"
                         ),
                         spaces="drive",
-                        fields="files(id, name)",
+                        supportsAllDrives=True,
                     )
                     for d, key in [("AAA", "root"), ("BBB", "ID_1"), ("CCC", "ID_2")]
                 ],
@@ -151,6 +159,7 @@ class TestGoogleDriveHook:
                         "parents": ["ID_2"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
                 mock.call()
                 .files()
@@ -161,6 +170,7 @@ class TestGoogleDriveHook:
                         "parents": ["ID_3"],
                     },
                     fields="id",
+                    supportsAllDrives=True,
                 ),
             ],
             any_order=True,
@@ -177,7 +187,7 @@ class TestGoogleDriveHook:
             {"files": [{"id": "ID_4"}]},
         ]
 
-        result_value = self.gdrive_hook._ensure_folders_exists("AAA/BBB/CCC/DDD")
+        result_value = self.gdrive_hook._ensure_folders_exists(path="AAA/BBB/CCC/DDD", folder_id="root")
 
         mock_get_conn.assert_has_calls(
             [
@@ -185,12 +195,14 @@ class TestGoogleDriveHook:
                     mock.call()
                     .files()
                     .list(
+                        fields="files(id, name)",
+                        includeItemsFromAllDrives=True,
                         q=(
                             "trashed=false and mimeType='application/vnd.google-apps.folder' "
                             f"and name='{d}' and '{key}' in parents"
                         ),
                         spaces="drive",
-                        fields="files(id, name)",
+                        supportsAllDrives=True,
                     )
                     for d, key in [("AAA", "root"), ("BBB", "ID_1"), ("CCC", "ID_2"), ("DDD", "ID_3")]
                 ],
@@ -327,7 +339,7 @@ class TestGoogleDriveHook:
 
         return_value = self.gdrive_hook.upload_file("local_path", "AA/BB/CC/remote_path")
 
-        mock_ensure_folders_exists.assert_called_once_with("AA/BB/CC")
+        mock_ensure_folders_exists.assert_called_once_with(path="AA/BB/CC", folder_id="root")
         mock_get_conn.assert_has_calls(
             [
                 mock.call()
diff --git a/tests/providers/google/suite/transfers/test_local_to_drive.py b/tests/providers/google/suite/transfers/test_local_to_drive.py
index 703e66ec23..affc4f1f7e 100644
--- a/tests/providers/google/suite/transfers/test_local_to_drive.py
+++ b/tests/providers/google/suite/transfers/test_local_to_drive.py
@@ -33,7 +33,11 @@ class TestLocalFilesystemToGoogleDriveOperator:
         context = {}
         mock_hook.return_value.upload_file.return_value = REMOTE_FILE_IDS
         op = LocalFilesystemToGoogleDriveOperator(
-            task_id="test_task", local_paths=LOCAL_PATHS, drive_folder=DRIVE_FOLDER, gcp_conn_id=GCP_CONN_ID
+            task_id="test_task",
+            local_paths=LOCAL_PATHS,
+            drive_folder=DRIVE_FOLDER,
+            gcp_conn_id=GCP_CONN_ID,
+            folder_id="some_folder_id",
         )
         op.execute(context)
 
@@ -43,12 +47,14 @@ class TestLocalFilesystemToGoogleDriveOperator:
                 remote_location="test_folder/test1",
                 chunk_size=100 * 1024 * 1024,
                 resumable=False,
+                folder_id="some_folder_id",
             ),
             mock.call(
                 local_location="test2",
                 remote_location="test_folder/test2",
                 chunk_size=100 * 1024 * 1024,
                 resumable=False,
+                folder_id="some_folder_id",
             ),
         ]
         mock_hook.return_value.upload_file.assert_has_calls(calls)