You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/05/18 08:05:48 UTC
[arrow-rs] branch master updated: Expose AwsAuthorizer (#4237)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f56690369 Expose AwsAuthorizer (#4237)
f56690369 is described below
commit f56690369b7fb7cecd5c57bc274f6560f37de5ca
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Thu May 18 09:05:41 2023 +0100
Expose AwsAuthorizer (#4237)
* Expose AWSAuthorizer
* Review feedback
---
object_store/src/aws/client.rs | 3 +-
object_store/src/aws/credential.rs | 128 ++++++++++++++++++++++++-------------
object_store/src/aws/mod.rs | 3 +-
3 files changed, 86 insertions(+), 48 deletions(-)
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 8ce743b31..2c45050fa 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -238,7 +238,7 @@ impl S3Client {
&self.config.region,
"s3",
self.config.sign_payload,
- payload_sha256,
+ payload_sha256.as_deref(),
)
.send_retry(&self.config.retry_config)
.await
@@ -315,7 +315,6 @@ impl S3Client {
let mut query = Vec::with_capacity(4);
- // Note: the order of these matters to ensure the generated URL is canonical
if let Some(token) = token {
query.push(("continuation-token", token))
}
diff --git a/object_store/src/aws/credential.rs b/object_store/src/aws/credential.rs
index 47d681c63..909dde072 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::aws::{STORE, STRICT_ENCODE_SET};
+use crate::aws::{STORE, STRICT_ENCODE_SET, STRICT_PATH_ENCODE_SET};
use crate::client::retry::RetryExt;
use crate::client::token::{TemporaryToken, TokenCache};
use crate::client::TokenProvider;
@@ -39,7 +39,8 @@ type StdError = Box<dyn std::error::Error + Send + Sync>;
/// SHA256 hash of empty string
static EMPTY_SHA256_HASH: &str =
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
-static UNSIGNED_PAYLOAD_LITERAL: &str = "UNSIGNED-PAYLOAD";
+static UNSIGNED_PAYLOAD: &str = "UNSIGNED-PAYLOAD";
+static STREAMING_PAYLOAD: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
/// A set of AWS security credentials
#[derive(Debug, Eq, PartialEq)]
@@ -72,8 +73,12 @@ impl AwsCredential {
}
}
-struct RequestSigner<'a> {
- date: DateTime<Utc>,
+/// Authorize a [`Request`] with an [`AwsCredential`] using [AWS SigV4]
+///
+/// [AWS SigV4]: https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+#[derive(Debug)]
+pub struct AwsAuthorizer<'a> {
+ date: Option<DateTime<Utc>>,
credential: &'a AwsCredential,
service: &'a str,
region: &'a str,
@@ -85,39 +90,78 @@ const HASH_HEADER: &str = "x-amz-content-sha256";
const TOKEN_HEADER: &str = "x-amz-security-token";
const AUTH_HEADER: &str = "authorization";
-impl<'a> RequestSigner<'a> {
- fn sign(&self, request: &mut Request, pre_calculated_digest: Option<Vec<u8>>) {
+impl<'a> AwsAuthorizer<'a> {
+ /// Create a new [`AwsAuthorizer`]
+ pub fn new(credential: &'a AwsCredential, service: &'a str, region: &'a str) -> Self {
+ Self {
+ credential,
+ service,
+ region,
+ date: None,
+ sign_payload: true,
+ }
+ }
+
+ /// Controls whether this [`AwsAuthorizer`] will attempt to sign the request payload,
+ /// the default is `true`
+ pub fn with_sign_payload(mut self, signed: bool) -> Self {
+ self.sign_payload = signed;
+ self
+ }
+
+ /// Authorize `request` with an optional pre-calculated SHA256 digest by attaching
+ /// the relevant [AWS SigV4] headers
+ ///
+ /// # Payload Signature
+ ///
+ /// AWS SigV4 requests must contain the `x-amz-content-sha256` header, it is set as follows:
+ ///
+ /// * If not configured to sign payloads, it is set to `UNSIGNED-PAYLOAD`
+ /// * If a `pre_calculated_digest` is provided, it is set to the hex encoding of it
+ /// * If it is a streaming request, it is set to `STREAMING-AWS4-HMAC-SHA256-PAYLOAD`
+ /// * Otherwise it is set to the hex encoded SHA256 of the request body
+ ///
+ /// [AWS SigV4]: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+ pub fn authorize(&self, request: &mut Request, pre_calculated_digest: Option<&[u8]>) {
if let Some(ref token) = self.credential.token {
let token_val = HeaderValue::from_str(token).unwrap();
request.headers_mut().insert(TOKEN_HEADER, token_val);
}
- let host_val = HeaderValue::from_str(
- &request.url()[url::Position::BeforeHost..url::Position::AfterPort],
- )
- .unwrap();
+ let host = &request.url()[url::Position::BeforeHost..url::Position::AfterPort];
+ let host_val = HeaderValue::from_str(host).unwrap();
request.headers_mut().insert("host", host_val);
- let date_str = self.date.format("%Y%m%dT%H%M%SZ").to_string();
+ let date = self.date.unwrap_or_else(Utc::now);
+ let date_str = date.format("%Y%m%dT%H%M%SZ").to_string();
let date_val = HeaderValue::from_str(&date_str).unwrap();
request.headers_mut().insert(DATE_HEADER, date_val);
- let digest = if self.sign_payload {
- if let Some(digest) = pre_calculated_digest {
- hex_encode(&digest)
- } else {
- match request.body() {
+ let digest = match self.sign_payload {
+ false => UNSIGNED_PAYLOAD.to_string(),
+ true => match pre_calculated_digest {
+ Some(digest) => hex_encode(digest),
+ None => match request.body() {
None => EMPTY_SHA256_HASH.to_string(),
- Some(body) => hex_digest(body.as_bytes().unwrap()),
- }
- }
- } else {
- UNSIGNED_PAYLOAD_LITERAL.to_string()
+ Some(body) => match body.as_bytes() {
+ Some(bytes) => hex_digest(bytes),
+ None => STREAMING_PAYLOAD.to_string(),
+ },
+ },
+ },
};
let header_digest = HeaderValue::from_str(&digest).unwrap();
request.headers_mut().insert(HASH_HEADER, header_digest);
+ // Each path segment must be URI-encoded twice (except for Amazon S3 which only gets URI-encoded once).
+ // see https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ let canonical_uri = match self.service {
+ "s3" => request.url().path().to_string(),
+ _ => utf8_percent_encode(request.url().path(), &STRICT_PATH_ENCODE_SET)
+ .to_string(),
+ };
+
let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
let canonical_query = canonicalize_query(request.url());
@@ -125,7 +169,7 @@ impl<'a> RequestSigner<'a> {
let canonical_request = format!(
"{}\n{}\n{}\n{}\n{}\n{}",
request.method().as_str(),
- request.url().path(), // S3 doesn't percent encode this like other services
+ canonical_uri,
canonical_query,
canonical_headers,
signed_headers,
@@ -135,14 +179,14 @@ impl<'a> RequestSigner<'a> {
let hashed_canonical_request = hex_digest(canonical_request.as_bytes());
let scope = format!(
"{}/{}/{}/aws4_request",
- self.date.format("%Y%m%d"),
+ date.format("%Y%m%d"),
self.region,
self.service
);
let string_to_sign = format!(
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
- self.date.format("%Y%m%dT%H%M%SZ"),
+ date.format("%Y%m%dT%H%M%SZ"),
scope,
hashed_canonical_request
);
@@ -150,7 +194,7 @@ impl<'a> RequestSigner<'a> {
// sign the string
let signature =
self.credential
- .sign(&string_to_sign, self.date, self.region, self.service);
+ .sign(&string_to_sign, date, self.region, self.service);
// build the actual auth header
let authorisation = format!(
@@ -171,7 +215,7 @@ pub trait CredentialExt {
region: &str,
service: &str,
sign_payload: bool,
- payload_sha256: Option<Vec<u8>>,
+ payload_sha256: Option<&[u8]>,
) -> Self;
}
@@ -182,21 +226,15 @@ impl CredentialExt for RequestBuilder {
region: &str,
service: &str,
sign_payload: bool,
- payload_sha256: Option<Vec<u8>>,
+ payload_sha256: Option<&[u8]>,
) -> Self {
let (client, request) = self.build_split();
let mut request = request.expect("request valid");
- let date = Utc::now();
- let signer = RequestSigner {
- date,
- credential,
- service,
- region,
- sign_payload,
- };
+ AwsAuthorizer::new(credential, service, region)
+ .with_sign_payload(sign_payload)
+ .authorize(&mut request, payload_sha256);
- signer.sign(&mut request, payload_sha256);
Self::from_parts(client, request)
}
}
@@ -539,15 +577,15 @@ mod tests {
.build()
.unwrap();
- let signer = RequestSigner {
- date,
+ let signer = AwsAuthorizer {
+ date: Some(date),
credential: &credential,
service: "ec2",
region: "us-east-1",
sign_payload: true,
};
- signer.sign(&mut request, None);
+ signer.authorize(&mut request, None);
assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=a3c787a7ed37f7fdfbfd2d7056a3d7c9d85e6d52a2bfbec73793c0be6e7862d4")
}
@@ -577,15 +615,15 @@ mod tests {
.build()
.unwrap();
- let signer = RequestSigner {
- date,
+ let authorizer = AwsAuthorizer {
+ date: Some(date),
credential: &credential,
service: "ec2",
region: "us-east-1",
sign_payload: false,
};
- signer.sign(&mut request, None);
+ authorizer.authorize(&mut request, None);
assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699")
}
@@ -614,15 +652,15 @@ mod tests {
.build()
.unwrap();
- let signer = RequestSigner {
- date,
+ let authorizer = AwsAuthorizer {
+ date: Some(date),
credential: &credential,
service: "s3",
region: "us-east-1",
sign_payload: true,
};
- signer.sign(&mut request, None);
+ authorizer.authorize(&mut request, None);
assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=H20ABqCkLZID4rLe/20220809/us-east-1/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=9ebf2f92872066c99ac94e573b4e1b80f4dbb8a32b1e8e23178318746e7d1b4d")
}
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index a7f43d153..e71124fba 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -82,7 +82,7 @@ const STORE: &str = "S3";
/// [`CredentialProvider`] for [`AmazonS3`]
pub type AwsCredentialProvider = Arc<dyn CredentialProvider<Credential = AwsCredential>>;
-pub use credential::AwsCredential;
+pub use credential::{AwsAuthorizer, AwsCredential};
/// Default metadata endpoint
static METADATA_ENDPOINT: &str = "http://169.254.169.254";
@@ -160,6 +160,7 @@ impl From<Error> for super::Error {
}
/// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist.
+///
/// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
pub async fn resolve_bucket_region(
bucket: &str,