You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/02/22 13:46:04 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #12475: ARROW-14893: [C++] Allow creating GCS filesystem from URI

pitrou commented on a change in pull request #12475:
URL: https://github.com/apache/arrow/pull/12475#discussion_r811953577



##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -610,21 +619,26 @@ class GcsFileSystem::Impl {
 
 bool GcsOptions::Equals(const GcsOptions& other) const {
   return credentials == other.credentials &&
-         endpoint_override == other.endpoint_override && scheme == other.scheme;
+         endpoint_override == other.endpoint_override && scheme == other.scheme &&
+         default_bucket_location == other.default_bucket_location;
 }
 
 GcsOptions GcsOptions::Defaults() {
   return GcsOptions{
       std::make_shared<GcsCredentials>(google::cloud::MakeGoogleDefaultCredentials()),
       {},
-      "https"};
+      "https",
+      /*default_bucket_location=*/{},
+      /*default_metadata=*/nullptr};

Review comment:
       Instead of spelling all default parameter values explicitly, it would probably more readable and maintainable to set the non-default attributes individually e.g.;
   ```c++
   GcsOptions options{};
   options.credentials = std::make_shared<GcsCredentials>(google::cloud::MakeInsecureCredentials());
   options.schema = "http";
   return options;
   ```
   

##########
File path: cpp/src/arrow/filesystem/gcsfs.h
##########
@@ -34,6 +35,13 @@ struct ARROW_EXPORT GcsOptions {
 
   std::string endpoint_override;
   std::string scheme;
+  // \brief Location to use for creating buckets.

Review comment:
       What kind of value can a location have? Is it a "region" like in S3?

##########
File path: cpp/src/arrow/filesystem/gcsfs.h
##########
@@ -89,6 +97,11 @@ struct ARROW_EXPORT GcsOptions {
   ///
   /// [aip/4112]: https://google.aip.dev/auth/4112
   static GcsOptions FromServiceAccountCredentials(const std::string& json_object);
+
+  // Initialize from URIs such as (gs://bucket/object).

Review comment:
       ```suggestion
     /// Initialize from URIs such as "gs://bucket/object".
   ```

##########
File path: cpp/src/arrow/filesystem/gcsfs.cc
##########
@@ -642,14 +658,73 @@ GcsOptions GcsOptions::FromImpersonatedServiceAccount(
                         google::cloud::MakeImpersonateServiceAccountCredentials(
                             base_credentials.credentials, target_service_account)),
                     {},
-                    "https"};
+                    "https",
+                    /*default_bucket_location=*/{},
+                    /*default_metadata=*/nullptr};
 }
 
 GcsOptions GcsOptions::FromServiceAccountCredentials(const std::string& json_object) {
   return GcsOptions{std::make_shared<GcsCredentials>(
                         google::cloud::MakeServiceAccountCredentials(json_object)),
                     {},
-                    "https"};
+                    "https",
+                    /*default_bucket_location=*/{},
+                    /*default_metadata=*/nullptr};
+}
+
+Result<GcsOptions> GcsOptions::FromUri(const arrow::internal::Uri& uri,
+                                       std::string* out_path) {
+  const auto bucket = uri.host();
+  auto path = uri.path();
+  if (bucket.empty()) {
+    if (!path.empty()) {
+      return Status::Invalid("Missing bucket name in GCS URI");
+    }
+  } else {
+    if (path.empty()) {
+      path = bucket;
+    } else {
+      if (path[0] != '/') {
+        return Status::Invalid("GCS URI should be absolute, not relative");
+      }
+      path = bucket + path;
+    }
+  }
+  if (out_path != nullptr) {
+    *out_path = std::string(internal::RemoveTrailingSlash(path));
+  }
+
+  std::unordered_map<std::string, std::string> options_map;
+  ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items());
+  for (const auto& kv : options_items) {
+    options_map.emplace(kv.first, kv.second);
+  }
+
+  if (!uri.password().empty() || !uri.username().empty()) {
+    return Status::Invalid("GCS does not accept username or password.");

Review comment:
       Is it because the GCS C++ APIs don't allow doing this, or we don't want to wire them (for security perhaps)?

##########
File path: cpp/src/arrow/filesystem/gcsfs.h
##########
@@ -34,6 +35,13 @@ struct ARROW_EXPORT GcsOptions {
 
   std::string endpoint_override;
   std::string scheme;
+  // \brief Location to use for creating buckets.

Review comment:
       ```suggestion
     /// \brief Location to use for creating buckets.
   ```

##########
File path: cpp/src/arrow/filesystem/filesystem.cc
##########
@@ -686,6 +689,16 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     }
     return std::make_shared<LocalFileSystem>(options, io_context);
   }
+  if (scheme == "gs") {

Review comment:
       Can we also accept "gcs" for compatibility with https://gcsfs.readthedocs.io/en/latest/#integration ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org