You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/05/18 08:05:48 UTC

[arrow-rs] branch master updated: Expose AwsAuthorizer (#4237)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f56690369 Expose AwsAuthorizer (#4237)
f56690369 is described below

commit f56690369b7fb7cecd5c57bc274f6560f37de5ca
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu May 18 09:05:41 2023 +0100

    Expose AwsAuthorizer (#4237)
    
    * Expose AWSAuthorizer
    
    * Review feedback
---
 object_store/src/aws/client.rs     |   3 +-
 object_store/src/aws/credential.rs | 128 ++++++++++++++++++++++++-------------
 object_store/src/aws/mod.rs        |   3 +-
 3 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 8ce743b31..2c45050fa 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -238,7 +238,7 @@ impl S3Client {
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
-                payload_sha256,
+                payload_sha256.as_deref(),
             )
             .send_retry(&self.config.retry_config)
             .await
@@ -315,7 +315,6 @@ impl S3Client {
 
         let mut query = Vec::with_capacity(4);
 
-        // Note: the order of these matters to ensure the generated URL is canonical
         if let Some(token) = token {
             query.push(("continuation-token", token))
         }
diff --git a/object_store/src/aws/credential.rs b/object_store/src/aws/credential.rs
index 47d681c63..909dde072 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::aws::{STORE, STRICT_ENCODE_SET};
+use crate::aws::{STORE, STRICT_ENCODE_SET, STRICT_PATH_ENCODE_SET};
 use crate::client::retry::RetryExt;
 use crate::client::token::{TemporaryToken, TokenCache};
 use crate::client::TokenProvider;
@@ -39,7 +39,8 @@ type StdError = Box<dyn std::error::Error + Send + Sync>;
 /// SHA256 hash of empty string
 static EMPTY_SHA256_HASH: &str =
     "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
-static UNSIGNED_PAYLOAD_LITERAL: &str = "UNSIGNED-PAYLOAD";
+static UNSIGNED_PAYLOAD: &str = "UNSIGNED-PAYLOAD";
+static STREAMING_PAYLOAD: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
 
 /// A set of AWS security credentials
 #[derive(Debug, Eq, PartialEq)]
@@ -72,8 +73,12 @@ impl AwsCredential {
     }
 }
 
-struct RequestSigner<'a> {
-    date: DateTime<Utc>,
+/// Authorize a [`Request`] with an [`AwsCredential`] using [AWS SigV4]
+///
+/// [AWS SigV4]: https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+#[derive(Debug)]
+pub struct AwsAuthorizer<'a> {
+    date: Option<DateTime<Utc>>,
     credential: &'a AwsCredential,
     service: &'a str,
     region: &'a str,
@@ -85,39 +90,78 @@ const HASH_HEADER: &str = "x-amz-content-sha256";
 const TOKEN_HEADER: &str = "x-amz-security-token";
 const AUTH_HEADER: &str = "authorization";
 
-impl<'a> RequestSigner<'a> {
-    fn sign(&self, request: &mut Request, pre_calculated_digest: Option<Vec<u8>>) {
+impl<'a> AwsAuthorizer<'a> {
+    /// Create a new [`AwsAuthorizer`]
+    pub fn new(credential: &'a AwsCredential, service: &'a str, region: &'a str) -> Self {
+        Self {
+            credential,
+            service,
+            region,
+            date: None,
+            sign_payload: true,
+        }
+    }
+
+    /// Controls whether this [`AwsAuthorizer`] will attempt to sign the request payload,
+    /// the default is `true`
+    pub fn with_sign_payload(mut self, signed: bool) -> Self {
+        self.sign_payload = signed;
+        self
+    }
+
+    /// Authorize `request` with an optional pre-calculated SHA256 digest by attaching
+    /// the relevant [AWS SigV4] headers
+    ///
+    /// # Payload Signature
+    ///
+    /// AWS SigV4 requests must contain the `x-amz-content-sha256` header, it is set as follows:
+    ///
+    /// * If not configured to sign payloads, it is set to `UNSIGNED-PAYLOAD`
+    /// * If a `pre_calculated_digest` is provided, it is set to the hex encoding of it
+    /// * If it is a streaming request, it is set to `STREAMING-AWS4-HMAC-SHA256-PAYLOAD`
+    /// * Otherwise it is set to the hex encoded SHA256 of the request body
+    ///
+    /// [AWS SigV4]: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+    pub fn authorize(&self, request: &mut Request, pre_calculated_digest: Option<&[u8]>) {
         if let Some(ref token) = self.credential.token {
             let token_val = HeaderValue::from_str(token).unwrap();
             request.headers_mut().insert(TOKEN_HEADER, token_val);
         }
 
-        let host_val = HeaderValue::from_str(
-            &request.url()[url::Position::BeforeHost..url::Position::AfterPort],
-        )
-        .unwrap();
+        let host = &request.url()[url::Position::BeforeHost..url::Position::AfterPort];
+        let host_val = HeaderValue::from_str(host).unwrap();
         request.headers_mut().insert("host", host_val);
 
-        let date_str = self.date.format("%Y%m%dT%H%M%SZ").to_string();
+        let date = self.date.unwrap_or_else(Utc::now);
+        let date_str = date.format("%Y%m%dT%H%M%SZ").to_string();
         let date_val = HeaderValue::from_str(&date_str).unwrap();
         request.headers_mut().insert(DATE_HEADER, date_val);
 
-        let digest = if self.sign_payload {
-            if let Some(digest) = pre_calculated_digest {
-                hex_encode(&digest)
-            } else {
-                match request.body() {
+        let digest = match self.sign_payload {
+            false => UNSIGNED_PAYLOAD.to_string(),
+            true => match pre_calculated_digest {
+                Some(digest) => hex_encode(digest),
+                None => match request.body() {
                     None => EMPTY_SHA256_HASH.to_string(),
-                    Some(body) => hex_digest(body.as_bytes().unwrap()),
-                }
-            }
-        } else {
-            UNSIGNED_PAYLOAD_LITERAL.to_string()
+                    Some(body) => match body.as_bytes() {
+                        Some(bytes) => hex_digest(bytes),
+                        None => STREAMING_PAYLOAD.to_string(),
+                    },
+                },
+            },
         };
 
         let header_digest = HeaderValue::from_str(&digest).unwrap();
         request.headers_mut().insert(HASH_HEADER, header_digest);
 
+        // Each path segment must be URI-encoded twice (except for Amazon S3 which only gets URI-encoded once).
+        // see https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+        let canonical_uri = match self.service {
+            "s3" => request.url().path().to_string(),
+            _ => utf8_percent_encode(request.url().path(), &STRICT_PATH_ENCODE_SET)
+                .to_string(),
+        };
+
         let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
         let canonical_query = canonicalize_query(request.url());
 
@@ -125,7 +169,7 @@ impl<'a> RequestSigner<'a> {
         let canonical_request = format!(
             "{}\n{}\n{}\n{}\n{}\n{}",
             request.method().as_str(),
-            request.url().path(), // S3 doesn't percent encode this like other services
+            canonical_uri,
             canonical_query,
             canonical_headers,
             signed_headers,
@@ -135,14 +179,14 @@ impl<'a> RequestSigner<'a> {
         let hashed_canonical_request = hex_digest(canonical_request.as_bytes());
         let scope = format!(
             "{}/{}/{}/aws4_request",
-            self.date.format("%Y%m%d"),
+            date.format("%Y%m%d"),
             self.region,
             self.service
         );
 
         let string_to_sign = format!(
             "AWS4-HMAC-SHA256\n{}\n{}\n{}",
-            self.date.format("%Y%m%dT%H%M%SZ"),
+            date.format("%Y%m%dT%H%M%SZ"),
             scope,
             hashed_canonical_request
         );
@@ -150,7 +194,7 @@ impl<'a> RequestSigner<'a> {
         // sign the string
         let signature =
             self.credential
-                .sign(&string_to_sign, self.date, self.region, self.service);
+                .sign(&string_to_sign, date, self.region, self.service);
 
         // build the actual auth header
         let authorisation = format!(
@@ -171,7 +215,7 @@ pub trait CredentialExt {
         region: &str,
         service: &str,
         sign_payload: bool,
-        payload_sha256: Option<Vec<u8>>,
+        payload_sha256: Option<&[u8]>,
     ) -> Self;
 }
 
@@ -182,21 +226,15 @@ impl CredentialExt for RequestBuilder {
         region: &str,
         service: &str,
         sign_payload: bool,
-        payload_sha256: Option<Vec<u8>>,
+        payload_sha256: Option<&[u8]>,
     ) -> Self {
         let (client, request) = self.build_split();
         let mut request = request.expect("request valid");
 
-        let date = Utc::now();
-        let signer = RequestSigner {
-            date,
-            credential,
-            service,
-            region,
-            sign_payload,
-        };
+        AwsAuthorizer::new(credential, service, region)
+            .with_sign_payload(sign_payload)
+            .authorize(&mut request, payload_sha256);
 
-        signer.sign(&mut request, payload_sha256);
         Self::from_parts(client, request)
     }
 }
@@ -539,15 +577,15 @@ mod tests {
             .build()
             .unwrap();
 
-        let signer = RequestSigner {
-            date,
+        let signer = AwsAuthorizer {
+            date: Some(date),
             credential: &credential,
             service: "ec2",
             region: "us-east-1",
             sign_payload: true,
         };
 
-        signer.sign(&mut request, None);
+        signer.authorize(&mut request, None);
         assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a3c787a7ed37f7fdfbfd2d7056a3d7c9d85e6d52a2bfbec73793c0be6e7862d4")
     }
 
@@ -577,15 +615,15 @@ mod tests {
             .build()
             .unwrap();
 
-        let signer = RequestSigner {
-            date,
+        let authorizer = AwsAuthorizer {
+            date: Some(date),
             credential: &credential,
             service: "ec2",
             region: "us-east-1",
             sign_payload: false,
         };
 
-        signer.sign(&mut request, None);
+        authorizer.authorize(&mut request, None);
         assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699")
     }
 
@@ -614,15 +652,15 @@ mod tests {
             .build()
             .unwrap();
 
-        let signer = RequestSigner {
-            date,
+        let authorizer = AwsAuthorizer {
+            date: Some(date),
             credential: &credential,
             service: "s3",
             region: "us-east-1",
             sign_payload: true,
         };
 
-        signer.sign(&mut request, None);
+        authorizer.authorize(&mut request, None);
         assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=H20ABqCkLZID4rLe/20220809/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=9ebf2f92872066c99ac94e573b4e1b80f4dbb8a32b1e8e23178318746e7d1b4d")
     }
 
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index a7f43d153..e71124fba 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -82,7 +82,7 @@ const STORE: &str = "S3";
 
 /// [`CredentialProvider`] for [`AmazonS3`]
 pub type AwsCredentialProvider = Arc<dyn CredentialProvider<Credential = AwsCredential>>;
-pub use credential::AwsCredential;
+pub use credential::{AwsAuthorizer, AwsCredential};
 
 /// Default metadata endpoint
 static METADATA_ENDPOINT: &str = "http://169.254.169.254";
@@ -160,6 +160,7 @@ impl From<Error> for super::Error {
 }
 
 /// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist.
+///
 /// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
 pub async fn resolve_bucket_region(
     bucket: &str,