You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/08/07 17:53:51 UTC

[arrow-rs] branch master updated: Fix Copy from percent-encoded path (#2353) (#2354)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a2de36335 Fix Copy from percent-encoded path (#2353) (#2354)
a2de36335 is described below

commit a2de36335c6fccb1e365c9032222e0d553ee5a55
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Sun Aug 7 18:53:46 2022 +0100

    Fix Copy from percent-encoded path (#2353) (#2354)
---
 object_store/src/aws.rs   | 20 +++++++++++++++++++-
 object_store/src/azure.rs | 17 +++++++++--------
 object_store/src/lib.rs   |  7 +++++++
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/object_store/src/aws.rs b/object_store/src/aws.rs
index d59f48bce..86766b052 100644
--- a/object_store/src/aws.rs
+++ b/object_store/src/aws.rs
@@ -48,6 +48,7 @@ use futures::{
     Future, Stream, StreamExt, TryStreamExt,
 };
 use hyper::client::Builder as HyperBuilder;
+use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
 use rusoto_core::ByteStream;
 use rusoto_credential::{InstanceMetadataProvider, StaticProvider};
 use rusoto_s3::S3;
@@ -62,6 +63,17 @@ use tokio::io::AsyncWrite;
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tracing::{debug, warn};
 
+// Do not URI-encode any of the unreserved characters that RFC 3986 defines:
+// A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ).
+const STRICT_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
+    .remove(b'-')
+    .remove(b'.')
+    .remove(b'_')
+    .remove(b'~');
+
+/// This struct is used to maintain the URI path encoding
+const STRICT_PATH_ENCODE_SET: AsciiSet = STRICT_ENCODE_SET.remove(b'/');
+
 /// The maximum number of times a request will be retried in the case of an AWS server error
 pub const MAX_NUM_RETRIES: u32 = 3;
 
@@ -541,9 +553,15 @@ impl ObjectStore for AmazonS3 {
         let to = to.as_ref();
         let bucket_name = self.bucket_name.clone();
 
+        let copy_source = format!(
+            "{}/{}",
+            &bucket_name,
+            percent_encode(from.as_ref(), &STRICT_PATH_ENCODE_SET)
+        );
+
         let request_factory = move || rusoto_s3::CopyObjectRequest {
             bucket: bucket_name.clone(),
-            copy_source: format!("{}/{}", &bucket_name, from),
+            copy_source,
             key: to.to_string(),
             ..Default::default()
         };
diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs
index 0d5f2fa7d..cee874b95 100644
--- a/object_store/src/azure.rs
+++ b/object_store/src/azure.rs
@@ -470,14 +470,15 @@ impl ObjectStore for MicrosoftAzure {
 
 impl MicrosoftAzure {
     /// helper function to create a source url for copy function
-    fn get_copy_from_url(&self, from: &Path) -> Result<reqwest::Url> {
-        Ok(reqwest::Url::parse(&format!(
-            "{}/{}/{}",
-            &self.blob_base_url, self.container_name, from
-        ))
-        .context(UnableToParseUrlSnafu {
-            container: &self.container_name,
-        })?)
+    fn get_copy_from_url(&self, from: &Path) -> Result<Url> {
+        let mut url =
+            Url::parse(&format!("{}/{}", &self.blob_base_url, self.container_name))
+                .context(UnableToParseUrlSnafu {
+                    container: &self.container_name,
+                })?;
+
+        url.path_segments_mut().unwrap().extend(from.parts());
+        Ok(url)
     }
 
     async fn list_impl(
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 08634e2fe..b60a29575 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -635,7 +635,14 @@ mod tests {
 
         assert_eq!(files, vec![emoji_file.clone()]);
 
+        let dst = Path::from("foo.parquet");
+        storage.copy(&emoji_file, &dst).await.unwrap();
+        let mut files = flatten_list_stream(storage, None).await.unwrap();
+        files.sort_unstable();
+        assert_eq!(files, vec![emoji_file.clone(), dst.clone()]);
+
         storage.delete(&emoji_file).await.unwrap();
+        storage.delete(&dst).await.unwrap();
         let files = flatten_list_stream(storage, Some(&emoji_prefix))
             .await
             .unwrap();