You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/01/02 14:42:24 UTC
[arrow-rs] branch master updated: Derive Clone for ObjectStore builders and Make URL Parsing Stricter (#3419) (#3424)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1889e33da Derive Clone for ObjectStore builders and Make URL Parsing Stricter (#3419) (#3424)
1889e33da is described below
commit 1889e33da31218ee2c58ad874036b17b699538b9
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Mon Jan 2 14:42:19 2023 +0000
Derive Clone for ObjectStore builders and Make URL Parsing Stricter (#3419) (#3424)
* Derive Clone for ObjectStore builders (#3419)
Make URL parsing more strict
* Review feedback
---
object_store/src/aws/mod.rs | 112 ++++++++++++++----------
object_store/src/azure/mod.rs | 196 ++++++++++++++++++++++--------------------
object_store/src/gcp/mod.rs | 95 ++++++++++----------
object_store/src/http/mod.rs | 21 +++--
4 files changed, 228 insertions(+), 196 deletions(-)
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index 0fcfbaf9c..786ccd20f 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -36,6 +36,7 @@ use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::stream::BoxStream;
use futures::TryStreamExt;
+use itertools::Itertools;
use snafu::{OptionExt, ResultExt, Snafu};
use std::collections::BTreeSet;
use std::ops::Range;
@@ -129,6 +130,9 @@ enum Error {
scheme
))]
UnknownUrlScheme { scheme: String },
+
+ #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+ UrlNotRecognised { url: String },
}
impl From<Error> for super::Error {
@@ -358,7 +362,7 @@ impl CloudMultiPartUploadImpl for S3MultiPartUpload {
/// .with_secret_access_key(SECRET_KEY)
/// .build();
/// ```
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone)]
pub struct AmazonS3Builder {
access_key_id: Option<String>,
secret_access_key: Option<String>,
@@ -366,13 +370,13 @@ pub struct AmazonS3Builder {
bucket_name: Option<String>,
endpoint: Option<String>,
token: Option<String>,
+ url: Option<String>,
retry_config: RetryConfig,
imdsv1_fallback: bool,
virtual_hosted_style_request: bool,
metadata_endpoint: Option<String>,
profile: Option<String>,
client_options: ClientOptions,
- url_parse_error: Option<Error>,
}
impl AmazonS3Builder {
@@ -453,9 +457,7 @@ impl AmazonS3Builder {
/// - `https://s3.<bucket>.amazonaws.com`
/// - `https://<bucket>.s3.<region>.amazonaws.com`
///
- /// Please note that this is a best effort implementation, and will not fail for malformed URLs,
- /// but rather warn and ignore the passed url. The url also has no effect on how the
- /// storage is accessed - e.g. which driver or protocol is used for reading from the location.
+ /// Note: Settings derived from the URL will override any others set on this builder
///
/// # Example
/// ```
@@ -465,44 +467,39 @@ impl AmazonS3Builder {
/// .with_url("s3://bucket/path")
/// .build();
/// ```
- pub fn with_url(mut self, url: impl AsRef<str>) -> Self {
- let maybe_parsed = Url::parse(url.as_ref());
- match maybe_parsed {
- Ok(parsed) => match parsed.scheme() {
- "s3" | "s3a" => {
- self.bucket_name = parsed.host_str().map(|host| host.to_owned());
- }
- "https" => {
- if let Some(host) = parsed.host_str() {
- let parts = host.splitn(4, '.').collect::<Vec<&str>>();
- if parts.len() == 4 && parts[0] == "s3" && parts[2] == "amazonaws"
- {
- self.bucket_name = Some(parts[1].to_string());
- }
- if parts.len() == 4
- && parts[1] == "s3"
- && parts[3] == "amazonaws.com"
- {
- self.bucket_name = Some(parts[0].to_string());
- self.region = Some(parts[2].to_string());
- self.virtual_hosted_style_request = true;
- }
- }
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
+ self
+ }
+
+ /// Sets properties on this builder based on a URL
+ ///
+ /// This is a separate member function to allow fallible computation to
+ /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+ fn parse_url(&mut self, url: &str) -> Result<()> {
+ let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+ let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+ let validate = |s: &str| match s.contains('.') {
+ true => Err(UrlNotRecognisedSnafu { url }.build()),
+ false => Ok(s.to_string()),
+ };
+
+ match parsed.scheme() {
+ "s3" | "s3a" => self.bucket_name = Some(validate(host)?),
+ "https" => match host.splitn(4, '.').collect_tuple() {
+ Some(("s3", bucket, "amazonaws", "com")) => {
+ self.bucket_name = Some(bucket.to_string());
}
- other => {
- self.url_parse_error = Some(Error::UnknownUrlScheme {
- scheme: other.into(),
- });
+ Some((bucket, "s3", region, "amazonaws.com")) => {
+ self.bucket_name = Some(bucket.to_string());
+ self.region = Some(region.to_string());
+ self.virtual_hosted_style_request = true;
}
+ _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
},
- Err(err) => {
- self.url_parse_error = Some(Error::UnableToParseUrl {
- source: err,
- url: url.as_ref().into(),
- });
- }
+ scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
};
- self
+ Ok(())
}
/// Set the AWS Access Key (required)
@@ -641,9 +638,9 @@ impl AmazonS3Builder {
/// Create a [`AmazonS3`] instance from the provided values,
/// consuming `self`.
- pub fn build(self) -> Result<AmazonS3> {
- if let Some(err) = self.url_parse_error {
- return Err(err.into());
+ pub fn build(mut self) -> Result<AmazonS3> {
+ if let Some(url) = self.url.take() {
+ self.parse_url(&url)?;
}
let bucket = self.bucket_name.context(MissingBucketNameSnafu)?;
@@ -1022,15 +1019,36 @@ mod tests {
#[test]
fn s3_test_urls() {
- let builder = AmazonS3Builder::new().with_url("s3://bucket/path");
+ let mut builder = AmazonS3Builder::new();
+ builder.parse_url("s3://bucket/path").unwrap();
assert_eq!(builder.bucket_name, Some("bucket".to_string()));
- let builder = AmazonS3Builder::new().with_url("https://s3.bucket.amazonaws.com");
+ let mut builder = AmazonS3Builder::new();
+ builder
+ .parse_url("https://s3.bucket.amazonaws.com")
+ .unwrap();
assert_eq!(builder.bucket_name, Some("bucket".to_string()));
- let builder =
- AmazonS3Builder::new().with_url("https://bucket.s3.region.amazonaws.com");
+ let mut builder = AmazonS3Builder::new();
+ builder
+ .parse_url("https://bucket.s3.region.amazonaws.com")
+ .unwrap();
assert_eq!(builder.bucket_name, Some("bucket".to_string()));
- assert_eq!(builder.region, Some("region".to_string()))
+ assert_eq!(builder.region, Some("region".to_string()));
+ assert!(builder.virtual_hosted_style_request);
+
+ let err_cases = [
+ "mailto://bucket/path",
+ "s3://bucket.mydomain/path",
+ "https://s3.bucket.mydomain.com",
+ "https://s3.bucket.foo.amazonaws.com",
+ "https://bucket.mydomain.region.amazonaws.com",
+ "https://bucket.s3.region.bar.amazonaws.com",
+ "https://bucket.foo.s3.amazonaws.com",
+ ];
+ let mut builder = AmazonS3Builder::new();
+ for case in err_cases {
+ builder.parse_url(case).unwrap_err();
+ }
}
}
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index 4224ae633..7cf369de3 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -37,7 +37,7 @@ use async_trait::async_trait;
use bytes::Bytes;
use chrono::{TimeZone, Utc};
use futures::{stream::BoxStream, StreamExt, TryStreamExt};
-use snafu::{ResultExt, Snafu};
+use snafu::{OptionExt, ResultExt, Snafu};
use std::collections::BTreeSet;
use std::fmt::{Debug, Formatter};
use std::io;
@@ -121,6 +121,9 @@ enum Error {
scheme
))]
UnknownUrlScheme { scheme: String },
+
+ #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+ UrlNotRecognised { url: String },
}
impl From<Error> for super::Error {
@@ -354,7 +357,7 @@ impl CloudMultiPartUploadImpl for AzureMultiPartUpload {
/// .with_container_name(BUCKET_NAME)
/// .build();
/// ```
-#[derive(Default)]
+#[derive(Default, Clone)]
pub struct MicrosoftAzureBuilder {
account_name: Option<String>,
access_key: Option<String>,
@@ -365,10 +368,10 @@ pub struct MicrosoftAzureBuilder {
tenant_id: Option<String>,
sas_query_pairs: Option<Vec<(String, String)>>,
authority_host: Option<String>,
+ url: Option<String>,
use_emulator: bool,
retry_config: RetryConfig,
client_options: ClientOptions,
- url_parse_error: Option<Error>,
}
impl Debug for MicrosoftAzureBuilder {
@@ -444,9 +447,7 @@ impl MicrosoftAzureBuilder {
/// - `https://<account>.dfs.core.windows.net`
/// - `https://<account>.blob.core.windows.net`
///
- /// Please note that this is a best effort implementation, and will not fail for malformed URLs,
- /// but rather warn and ignore the passed url. The url also has no effect on how the
- /// storage is accessed - e.g. which driver or protocol is used for reading from the location.
+ /// Note: Settings derived from the URL will override any others set on this builder
///
/// # Example
/// ```
@@ -456,52 +457,48 @@ impl MicrosoftAzureBuilder {
/// .with_url("abfss://file_system@account.dfs.core.windows.net/")
/// .build();
/// ```
- pub fn with_url(mut self, url: impl AsRef<str>) -> Self {
- let maybe_parsed = Url::parse(url.as_ref());
- match maybe_parsed {
- Ok(parsed) => match parsed.scheme() {
- "az" | "adl" | "azure" => {
- self.container_name = parsed.host_str().map(|host| host.to_owned());
- }
- "abfs" | "abfss" => {
- // abfs(s) might refer to the fsspec convention abfs://<container>/<path>
- // or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
- if parsed.username().is_empty() {
- self.container_name =
- parsed.host_str().map(|host| host.to_owned());
- } else if let Some(host) = parsed.host_str() {
- let parts = host.splitn(2, '.').collect::<Vec<&str>>();
- if parts.len() == 2 && parts[1] == "dfs.core.windows.net" {
- self.container_name = Some(parsed.username().to_owned());
- self.account_name = Some(parts[0].to_string());
- }
- }
- }
- "https" => {
- if let Some(host) = parsed.host_str() {
- let parts = host.splitn(2, '.').collect::<Vec<&str>>();
- if parts.len() == 2
- && (parts[1] == "dfs.core.windows.net"
- || parts[1] == "blob.core.windows.net")
- {
- self.account_name = Some(parts[0].to_string());
- }
- }
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
+ self
+ }
+
+ /// Sets properties on this builder based on a URL
+ ///
+ /// This is a separate member function to allow fallible computation to
+ /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+ fn parse_url(&mut self, url: &str) -> Result<()> {
+ let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+ let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+
+ let validate = |s: &str| match s.contains('.') {
+ true => Err(UrlNotRecognisedSnafu { url }.build()),
+ false => Ok(s.to_string()),
+ };
+
+ match parsed.scheme() {
+ "az" | "adl" | "azure" => self.container_name = Some(validate(host)?),
+ "abfs" | "abfss" => {
+ // abfs(s) might refer to the fsspec convention abfs://<container>/<path>
+ // or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
+ if parsed.username().is_empty() {
+ self.container_name = Some(validate(host)?);
+ } else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
+ self.container_name = Some(validate(parsed.username())?);
+ self.account_name = Some(validate(a)?);
+ } else {
+ return Err(UrlNotRecognisedSnafu { url }.build().into());
}
- other => {
- self.url_parse_error = Some(Error::UnknownUrlScheme {
- scheme: other.into(),
- });
+ }
+ "https" => match host.split_once('.') {
+ Some((a, "dfs.core.windows.net"))
+ | Some((a, "blob.core.windows.net")) => {
+ self.account_name = Some(validate(a)?);
}
+ _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
},
- Err(err) => {
- self.url_parse_error = Some(Error::UnableToParseUrl {
- source: err,
- url: url.as_ref().into(),
- });
- }
- };
- self
+ scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+ }
+ Ok(())
}
/// Set the Azure Account (required)
@@ -595,63 +592,49 @@ impl MicrosoftAzureBuilder {
/// Configure a connection to container with given name on Microsoft Azure
/// Blob store.
- pub fn build(self) -> Result<MicrosoftAzure> {
- let Self {
- account_name,
- access_key,
- container_name,
- bearer_token,
- client_id,
- client_secret,
- tenant_id,
- sas_query_pairs,
- use_emulator,
- retry_config,
- authority_host,
- mut client_options,
- url_parse_error,
- } = self;
-
- if let Some(err) = url_parse_error {
- return Err(err.into());
+ pub fn build(mut self) -> Result<MicrosoftAzure> {
+ if let Some(url) = self.url.take() {
+ self.parse_url(&url)?;
}
- let container = container_name.ok_or(Error::MissingContainerName {})?;
+ let container = self.container_name.ok_or(Error::MissingContainerName {})?;
- let (is_emulator, storage_url, auth, account) = if use_emulator {
- let account_name =
- account_name.unwrap_or_else(|| EMULATOR_ACCOUNT.to_string());
+ let (is_emulator, storage_url, auth, account) = if self.use_emulator {
+ let account_name = self
+ .account_name
+ .unwrap_or_else(|| EMULATOR_ACCOUNT.to_string());
// Allow overriding defaults. Values taken from
// from https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
let url = url_from_env("AZURITE_BLOB_STORAGE_URL", "http://127.0.0.1:10000")?;
- let account_key =
- access_key.unwrap_or_else(|| EMULATOR_ACCOUNT_KEY.to_string());
+ let account_key = self
+ .access_key
+ .unwrap_or_else(|| EMULATOR_ACCOUNT_KEY.to_string());
let credential = credential::CredentialProvider::AccessKey(account_key);
- client_options = client_options.with_allow_http(true);
+ self.client_options = self.client_options.with_allow_http(true);
(true, url, credential, account_name)
} else {
- let account_name = account_name.ok_or(Error::MissingAccount {})?;
+ let account_name = self.account_name.ok_or(Error::MissingAccount {})?;
let account_url = format!("https://{}.blob.core.windows.net", &account_name);
let url = Url::parse(&account_url)
.context(UnableToParseUrlSnafu { url: account_url })?;
- let credential = if let Some(bearer_token) = bearer_token {
+ let credential = if let Some(bearer_token) = self.bearer_token {
Ok(credential::CredentialProvider::AccessKey(bearer_token))
- } else if let Some(access_key) = access_key {
+ } else if let Some(access_key) = self.access_key {
Ok(credential::CredentialProvider::AccessKey(access_key))
} else if let (Some(client_id), Some(client_secret), Some(tenant_id)) =
- (client_id, client_secret, tenant_id)
+ (self.client_id, self.client_secret, self.tenant_id)
{
let client_credential = credential::ClientSecretOAuthProvider::new(
client_id,
client_secret,
tenant_id,
- authority_host,
+ self.authority_host,
);
Ok(credential::CredentialProvider::ClientSecret(
client_credential,
))
- } else if let Some(query_pairs) = sas_query_pairs {
+ } else if let Some(query_pairs) = self.sas_query_pairs {
Ok(credential::CredentialProvider::SASToken(query_pairs))
} else {
Err(Error::MissingCredentials {})
@@ -661,12 +644,12 @@ impl MicrosoftAzureBuilder {
let config = client::AzureConfig {
account,
- retry_config,
- service: storage_url,
+ is_emulator,
container,
+ retry_config: self.retry_config,
+ client_options: self.client_options,
+ service: storage_url,
credentials: auth,
- is_emulator,
- client_options,
};
let client = Arc::new(client::AzureClient::new(config)?);
@@ -804,26 +787,49 @@ mod tests {
#[test]
fn azure_blob_test_urls() {
- let builder = MicrosoftAzureBuilder::new()
- .with_url("abfss://file_system@account.dfs.core.windows.net/");
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("abfss://file_system@account.dfs.core.windows.net/")
+ .unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
assert_eq!(builder.container_name, Some("file_system".to_string()));
- let builder = MicrosoftAzureBuilder::new().with_url("abfs://container/path");
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder.parse_url("abfs://container/path").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
- let builder = MicrosoftAzureBuilder::new().with_url("az://container");
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder.parse_url("az://container").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
- let builder = MicrosoftAzureBuilder::new().with_url("az://container/path");
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder.parse_url("az://container/path").unwrap();
assert_eq!(builder.container_name, Some("container".to_string()));
- let builder = MicrosoftAzureBuilder::new()
- .with_url("https://account.dfs.core.windows.net/");
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.dfs.core.windows.net/")
+ .unwrap();
assert_eq!(builder.account_name, Some("account".to_string()));
- let builder = MicrosoftAzureBuilder::new()
- .with_url("https://account.blob.core.windows.net/");
- assert_eq!(builder.account_name, Some("account".to_string()))
+ let mut builder = MicrosoftAzureBuilder::new();
+ builder
+ .parse_url("https://account.blob.core.windows.net/")
+ .unwrap();
+ assert_eq!(builder.account_name, Some("account".to_string()));
+
+ let err_cases = [
+ "mailto://account.blob.core.windows.net/",
+ "az://blob.mydomain/",
+ "abfs://container.foo/path",
+ "abfss://file_system@account.foo.dfs.core.windows.net/",
+ "abfss://file_system.bar@account.dfs.core.windows.net/",
+ "https://blob.mydomain/",
+ "https://blob.foo.dfs.core.windows.net/",
+ ];
+ let mut builder = MicrosoftAzureBuilder::new();
+ for case in err_cases {
+ builder.parse_url(case).unwrap_err();
+ }
}
}
diff --git a/object_store/src/gcp/mod.rs b/object_store/src/gcp/mod.rs
index c1424d971..f2638748f 100644
--- a/object_store/src/gcp/mod.rs
+++ b/object_store/src/gcp/mod.rs
@@ -42,7 +42,7 @@ use futures::{stream::BoxStream, StreamExt, TryStreamExt};
use percent_encoding::{percent_encode, NON_ALPHANUMERIC};
use reqwest::header::RANGE;
use reqwest::{header, Client, Method, Response, StatusCode};
-use snafu::{ResultExt, Snafu};
+use snafu::{OptionExt, ResultExt, Snafu};
use tokio::io::AsyncWrite;
use url::Url;
@@ -142,6 +142,9 @@ enum Error {
scheme
))]
UnknownUrlScheme { scheme: String },
+
+ #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+ UrlNotRecognised { url: String },
}
impl From<Error> for super::Error {
@@ -784,13 +787,13 @@ fn reader_credentials_file(
/// .with_bucket_name(BUCKET_NAME)
/// .build();
/// ```
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct GoogleCloudStorageBuilder {
bucket_name: Option<String>,
+ url: Option<String>,
service_account_path: Option<String>,
retry_config: RetryConfig,
client_options: ClientOptions,
- url_parse_error: Option<Error>,
}
impl Default for GoogleCloudStorageBuilder {
@@ -800,7 +803,7 @@ impl Default for GoogleCloudStorageBuilder {
service_account_path: None,
retry_config: Default::default(),
client_options: ClientOptions::new().with_allow_http(true),
- url_parse_error: None,
+ url: None,
}
}
}
@@ -845,9 +848,7 @@ impl GoogleCloudStorageBuilder {
///
/// - `gs://<bucket>/<path>`
///
- /// Please note that this is a best effort implementation, and will not fail for malformed URLs,
- /// but rather warn and ignore the passed url. The url also has no effect on how the
- /// storage is accessed - e.g. which driver or protocol is used for reading from the location.
+ /// Note: Settings derived from the URL will override any others set on this builder
///
/// # Example
/// ```
@@ -857,29 +858,31 @@ impl GoogleCloudStorageBuilder {
/// .with_url("gs://bucket/path")
/// .build();
/// ```
- pub fn with_url(mut self, url: impl AsRef<str>) -> Self {
- let maybe_parsed = Url::parse(url.as_ref());
- match maybe_parsed {
- Ok(parsed) => match parsed.scheme() {
- "gs" => {
- self.bucket_name = parsed.host_str().map(|host| host.to_owned());
- }
- other => {
- self.url_parse_error = Some(Error::UnknownUrlScheme {
- scheme: other.into(),
- });
- }
- },
- Err(err) => {
- self.url_parse_error = Some(Error::UnableToParseUrl {
- source: err,
- url: url.as_ref().into(),
- });
- }
- };
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
self
}
+ /// Sets properties on this builder based on a URL
+ ///
+ /// This is a separate member function to allow fallible computation to
+ /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+ fn parse_url(&mut self, url: &str) -> Result<()> {
+ let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+ let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+
+ let validate = |s: &str| match s.contains('.') {
+ true => Err(UrlNotRecognisedSnafu { url }.build()),
+ false => Ok(s.to_string()),
+ };
+
+ match parsed.scheme() {
+ "gs" => self.bucket_name = Some(validate(host)?),
+ scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+ }
+ Ok(())
+ }
+
/// Set the bucket name (required)
pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
self.bucket_name = Some(bucket_name.into());
@@ -927,24 +930,17 @@ impl GoogleCloudStorageBuilder {
/// Configure a connection to Google Cloud Storage, returning a
/// new [`GoogleCloudStorage`] and consuming `self`
- pub fn build(self) -> Result<GoogleCloudStorage> {
- let Self {
- bucket_name,
- service_account_path,
- retry_config,
- client_options,
- url_parse_error,
- } = self;
-
- if let Some(err) = url_parse_error {
- return Err(err.into());
+ pub fn build(mut self) -> Result<GoogleCloudStorage> {
+ if let Some(url) = self.url.take() {
+ self.parse_url(&url)?;
}
- let bucket_name = bucket_name.ok_or(Error::MissingBucketName {})?;
- let service_account_path =
- service_account_path.ok_or(Error::MissingServiceAccountPath)?;
+ let bucket_name = self.bucket_name.ok_or(Error::MissingBucketName {})?;
+ let service_account_path = self
+ .service_account_path
+ .ok_or(Error::MissingServiceAccountPath)?;
- let client = client_options.client()?;
+ let client = self.client_options.client()?;
let credentials = reader_credentials_file(service_account_path)?;
// TODO: https://cloud.google.com/storage/docs/authentication#oauth-scopes
@@ -977,8 +973,8 @@ impl GoogleCloudStorageBuilder {
token_cache: Default::default(),
bucket_name,
bucket_name_encoded: encoded_bucket_name,
- retry_config,
- client_options,
+ retry_config: self.retry_config,
+ client_options: self.client_options,
max_list_results: None,
}),
})
@@ -1199,7 +1195,14 @@ mod test {
#[test]
fn gcs_test_urls() {
- let builder = GoogleCloudStorageBuilder::new().with_url("gs://bucket/path");
- assert_eq!(builder.bucket_name, Some("bucket".to_string()))
+ let mut builder = GoogleCloudStorageBuilder::new();
+ builder.parse_url("gs://bucket/path").unwrap();
+ assert_eq!(builder.bucket_name, Some("bucket".to_string()));
+
+ let err_cases = ["mailto://bucket/path", "gs://bucket.mydomain/path"];
+ let mut builder = GoogleCloudStorageBuilder::new();
+ for case in err_cases {
+ builder.parse_url(case).unwrap_err();
+ }
}
}
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index 25997d892..f05e70024 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -55,8 +55,11 @@ enum Error {
#[snafu(display("Must specify a URL"))]
MissingUrl,
- #[snafu(display("Invalid URL: {}", source))]
- InvalidUrl { source: reqwest::Error },
+ #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+ UnableToParseUrl {
+ source: url::ParseError,
+ url: String,
+ },
#[snafu(display("Object is a directory"))]
IsDirectory,
@@ -210,9 +213,9 @@ impl ObjectStore for HttpStore {
}
/// Configure a connection to a generic HTTP server
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone)]
pub struct HttpBuilder {
- url: Option<Result<Url>>,
+ url: Option<String>,
client_options: ClientOptions,
retry_config: RetryConfig,
}
@@ -224,8 +227,8 @@ impl HttpBuilder {
}
/// Set the URL
- pub fn with_url(mut self, url: impl reqwest::IntoUrl) -> Self {
- self.url = Some(url.into_url().context(InvalidUrlSnafu).map_err(Into::into));
+ pub fn with_url(mut self, url: impl Into<String>) -> Self {
+ self.url = Some(url.into());
self
}
@@ -243,9 +246,11 @@ impl HttpBuilder {
/// Build an [`HttpStore`] with the configured options
pub fn build(self) -> Result<HttpStore> {
- let url = self.url.context(MissingUrlSnafu)??;
+ let url = self.url.context(MissingUrlSnafu)?;
+ let parsed = Url::parse(&url).context(UnableToParseUrlSnafu { url })?;
+
Ok(HttpStore {
- client: Client::new(url, self.client_options, self.retry_config)?,
+ client: Client::new(parsed, self.client_options, self.retry_config)?,
})
}
}