You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/08/17 13:46:51 UTC
[arrow] branch master updated: ARROW-9517: [C++/Python] Add support
for temporary credentials to S3Options
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1018a4f ARROW-9517: [C++/Python] Add support for temporary credentials to S3Options
1018a4f is described below
commit 1018a4f50fb3cd9b5f622a25ad0c69b1389c9a96
Author: Matt Corley <ma...@23andme.com>
AuthorDate: Mon Aug 17 15:46:11 2020 +0200
ARROW-9517: [C++/Python] Add support for temporary credentials to S3Options
### Background
AWS provides a mechanism for using temporary credentials to access AWS resources. When accessing AWS resources with a set of temporary credentials,[ users must provide a session token](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) in addition to the usual access key id and secret access key.
This PR adds support for providing a session token when initializing an S3FileSystem.
Additionally, this PR adds support for auto-refreshing temporary credentials via STS AssumeRole: instead of passing explicit credentials, users supply the arn of a role to assume, and an `STSAssumeRoleCredentialsProvider` will be created to handle fetching temporary credentials by assuming this role.
### Changes
#### C++
- updated `S3Options.FromAccessKey` and `S3Options.ConfigureAccessKey` to accept an optional `session_token` argument (defaulting to empty string, in accordance with the convention of [Aws::Auth::AWSCredentials](https://sdk.amazonaws.com/cpp/api/0.12.9/d4/d27/class_aws_1_1_auth_1_1_a_w_s_credentials.html) as implemented in the AWS C++ SDK.)
- updated `S3FileSystem` implementation to initialize S3Client with CredentialsProvider instead of directly passing AwsCredentials (enabling auto-refreshing of AwsCredentials from provider)
- added `S3Options.GetSessionToken` method
- added `S3Options.FromAssumeRole` and `S3Options.ConfigureAssumeRoleCredentials` to support auto-refreshing temporary credentials via STS AssumeRole.
#### Python
- updated cdef `CS3Options` to reflect updates to C++ library
- added optional `session_token` argument to `S3FileSystem.__init__`.
### Testing
- updated `test_s3_options` python unittest to include a mock session_token when initializing S3FileSystem.
- updated `test_s3_options` python unittest to include relevant scenarios for provided a `role_arn` when initializing S3FileSystem.
- updated `s3fs_test.cc` c++ tests to cover changes to FromAccessKey as well as the newly added FromAssumeRole.
- successfully ran ctest suite and pytest suite with `--enable-s3` option.
Closes #7803 from corleyma/dev/ARROW-9517-add-s3-option-session-token
Authored-by: Matt Corley <ma...@23andme.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 27 +++++++--
cpp/src/arrow/filesystem/s3fs.cc | 48 ++++++++++++---
cpp/src/arrow/filesystem/s3fs.h | 37 ++++++++++--
cpp/src/arrow/filesystem/s3fs_test.cc | 52 ++++++++++++++++-
python/manylinux1/scripts/build_aws_sdk.sh | 2 +-
python/manylinux201x/scripts/build_aws_sdk.sh | 2 +-
python/pyarrow/_s3fs.pyx | 84 +++++++++++++++++++++++----
python/pyarrow/includes/libarrow_fs.pxd | 17 +++++-
python/pyarrow/tests/test_fs.py | 17 +++++-
9 files changed, 251 insertions(+), 35 deletions(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f983d58..6e50804 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2632,7 +2632,7 @@ macro(build_awssdk)
set(AWSSDK_CMAKE_ARGS
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_LIBDIR=lib
- -DBUILD_ONLY=s3;core;config
+ -DBUILD_ONLY=s3;core;config;identity-management;sts
-DENABLE_UNITY_BUILD=on
-DENABLE_TESTING=off
"-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
@@ -2646,7 +2646,16 @@ macro(build_awssdk)
AWSSDK_S3_SHARED_LIB
"${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX}"
)
- set(AWSSDK_SHARED_LIBS "${AWSSDK_CORE_SHARED_LIB}" "${AWSSDK_S3_SHARED_LIB}")
+ set(
+ AWSSDK_IAM_SHARED_LIB
+ "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-identity-management${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ )
+ set(
+ AWSSDK_STS_SHARED_LIB
+ "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-sts${CMAKE_SHARED_LIBRARY_SUFFIX}"
+ )
+ set(AWSSDK_SHARED_LIBS "${AWSSDK_CORE_SHARED_LIB}" "${AWSSDK_S3_SHARED_LIB}"
+ "${AWSSDK_IAM_SHARED_LIB}" "${AWSSDK_STS_SHARED_LIB}")
externalproject_add(awssdk_ep
${EP_LOG_OPTIONS}
@@ -2668,14 +2677,24 @@ if(ARROW_S3)
# Need to customize the find_package() call, so cannot call resolve_dependency()
if(AWSSDK_SOURCE STREQUAL "AUTO")
- find_package(AWSSDK COMPONENTS config s3 transfer)
+ find_package(AWSSDK
+ COMPONENTS config
+ s3
+ transfer
+ identity-management
+ sts)
if(NOT AWSSDK_FOUND)
build_awssdk()
endif()
elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
build_awssdk()
elseif(AWSSDK_SOURCE STREQUAL "SYSTEM")
- find_package(AWSSDK REQUIRED COMPONENTS config s3 transfer)
+ find_package(AWSSDK REQUIRED
+ COMPONENTS config
+ s3
+ transfer
+ identity-management
+ sts)
endif()
include_directories(SYSTEM ${AWSSDK_INCLUDE_DIR})
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 77cc642..cc58890 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -41,6 +41,7 @@
#include <aws/core/client/RetryStrategy.h>
#include <aws/core/utils/logging/ConsoleLogSystem.h>
#include <aws/core/utils/stream/PreallocatedStreamBuf.h>
+#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/AbortMultipartUploadRequest.h>
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
@@ -170,9 +171,19 @@ void S3Options::ConfigureAnonymousCredentials() {
}
void S3Options::ConfigureAccessKey(const std::string& access_key,
- const std::string& secret_key) {
+ const std::string& secret_key,
+ const std::string& session_token) {
credentials_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
- ToAwsString(access_key), ToAwsString(secret_key));
+ ToAwsString(access_key), ToAwsString(secret_key), ToAwsString(session_token));
+}
+
+void S3Options::ConfigureAssumeRoleCredentials(
+ const std::string& role_arn, const std::string& session_name,
+ const std::string& external_id, int load_frequency,
+ const std::shared_ptr<Aws::STS::STSClient>& stsClient) {
+ credentials_provider = std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
+ ToAwsString(role_arn), ToAwsString(session_name), ToAwsString(external_id),
+ load_frequency, stsClient);
}
std::string S3Options::GetAccessKey() const {
@@ -185,6 +196,11 @@ std::string S3Options::GetSecretKey() const {
return std::string(FromAwsString(credentials.GetAWSSecretKey()));
}
+std::string S3Options::GetSessionToken() const {
+ auto credentials = credentials_provider->GetAWSCredentials();
+ return std::string(FromAwsString(credentials.GetSessionToken()));
+}
+
S3Options S3Options::Defaults() {
S3Options options;
options.ConfigureDefaultCredentials();
@@ -198,9 +214,24 @@ S3Options S3Options::Anonymous() {
}
S3Options S3Options::FromAccessKey(const std::string& access_key,
- const std::string& secret_key) {
+ const std::string& secret_key,
+ const std::string& session_token) {
+ S3Options options;
+ options.ConfigureAccessKey(access_key, secret_key, session_token);
+ return options;
+}
+
+S3Options S3Options::FromAssumeRole(
+ const std::string& role_arn, const std::string& session_name,
+ const std::string& external_id, int load_frequency,
+ const std::shared_ptr<Aws::STS::STSClient>& stsClient) {
S3Options options;
- options.ConfigureAccessKey(access_key, secret_key);
+ options.role_arn = role_arn;
+ options.session_name = session_name;
+ options.external_id = external_id;
+ options.load_frequency = load_frequency;
+ options.ConfigureAssumeRoleCredentials(role_arn, session_name, external_id,
+ load_frequency, stsClient);
return options;
}
@@ -267,7 +298,8 @@ bool S3Options::Equals(const S3Options& other) const {
return (region == other.region && endpoint_override == other.endpoint_override &&
scheme == other.scheme && background_writes == other.background_writes &&
GetAccessKey() == other.GetAccessKey() &&
- GetSecretKey() == other.GetSecretKey());
+ GetSecretKey() == other.GetSecretKey() &&
+ GetSessionToken() == other.GetSessionToken());
}
namespace {
@@ -861,7 +893,7 @@ class S3FileSystem::Impl {
public:
S3Options options_;
Aws::Client::ClientConfiguration client_config_;
- Aws::Auth::AWSCredentials credentials_;
+ std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
std::unique_ptr<Aws::S3::S3Client> client_;
const int32_t kListObjectsMaxKeys = 1000;
@@ -873,7 +905,7 @@ class S3FileSystem::Impl {
explicit Impl(S3Options options) : options_(std::move(options)) {}
Status Init() {
- credentials_ = options_.credentials_provider->GetAWSCredentials();
+ credentials_provider_ = options_.credentials_provider;
client_config_.region = ToAwsString(options_.region);
client_config_.endpointOverride = ToAwsString(options_.endpoint_override);
if (options_.scheme == "http") {
@@ -893,7 +925,7 @@ class S3FileSystem::Impl {
bool use_virtual_addressing = options_.endpoint_override.empty();
client_.reset(
- new Aws::S3::S3Client(credentials_, client_config_,
+ new Aws::S3::S3Client(credentials_provider_, client_config_,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
use_virtual_addressing));
return Status::OK();
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index 6bf2993..aa39c30 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -29,8 +29,12 @@ namespace Aws {
namespace Auth {
class AWSCredentialsProvider;
+class STSAssumeRoleCredentialsProvider;
} // namespace Auth
+namespace STS {
+class STSClient;
+}
} // namespace Aws
namespace arrow {
@@ -49,6 +53,15 @@ struct ARROW_EXPORT S3Options {
/// S3 connection transport, default "https"
std::string scheme = "https";
+ /// ARN of role to assume
+ std::string role_arn;
+ /// Optional identifier for an assumed role session.
+ std::string session_name;
+ /// Optional external idenitifer to pass to STS when assuming a role
+ std::string external_id;
+ /// Frequency (in seconds) to refresh temporary credentials from assumed role
+ int load_frequency;
+
/// AWS credentials provider
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
@@ -62,10 +75,18 @@ struct ARROW_EXPORT S3Options {
void ConfigureAnonymousCredentials();
/// Configure with explicit access and secret key.
- void ConfigureAccessKey(const std::string& access_key, const std::string& secret_key);
+ void ConfigureAccessKey(const std::string& access_key, const std::string& secret_key,
+ const std::string& session_token = "");
+
+ /// Configure with credentials from an assumed role.
+ void ConfigureAssumeRoleCredentials(
+ const std::string& role_arn, const std::string& session_name = "",
+ const std::string& external_id = "", int load_frequency = 900,
+ const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
std::string GetAccessKey() const;
std::string GetSecretKey() const;
+ std::string GetSessionToken() const;
bool Equals(const S3Options& other) const;
@@ -78,10 +99,18 @@ struct ARROW_EXPORT S3Options {
///
/// This will only let you access public buckets.
static S3Options Anonymous();
- /// \brief Initialize with explicit access and secret key
+ /// \brief Initialize with explicit access and secret key.
+ ///
+ /// Optionally, a session token may also be provided for temporary credentials
+ /// (from STS).
static S3Options FromAccessKey(const std::string& access_key,
- const std::string& secret_key);
-
+ const std::string& secret_key,
+ const std::string& session_token = "");
+ /// \brief Initialize from an assumed role.
+ static S3Options FromAssumeRole(
+ const std::string& role_arn, const std::string& session_name = "",
+ const std::string& external_id = "", int load_frequency = 900,
+ const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
static Result<S3Options> FromUri(const ::arrow::internal::Uri& uri,
std::string* out_path = NULLPTR);
static Result<S3Options> FromUri(const std::string& uri,
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index d71e1bb..58dde3f 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -28,10 +28,10 @@
//
// See also:
// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-#include <boost/process.hpp>
-
#include <gtest/gtest.h>
+#include <boost/process.hpp>
+
#ifdef _WIN32
// Undefine preprocessor macros that interfere with AWS function / method names
#ifdef GetMessage
@@ -51,6 +51,7 @@
#include <aws/s3/model/CreateBucketRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <aws/s3/model/PutObjectRequest.h>
+#include <aws/sts/STSClient.h>
#include "arrow/filesystem/filesystem.h"
#include "arrow/filesystem/s3_internal.h"
@@ -61,13 +62,16 @@
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
+#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
namespace arrow {
namespace fs {
+using ::arrow::internal::DelEnvVar;
using ::arrow::internal::PlatformFilename;
+using ::arrow::internal::SetEnvVar;
using ::arrow::internal::UriEscape;
using ::arrow::fs::internal::ConnectRetryStrategy;
@@ -152,7 +156,18 @@ void AssertObjectContents(Aws::S3::S3Client* client, const std::string& bucket,
////////////////////////////////////////////////////////////////////////////
// S3Options tests
-TEST(S3Options, FromUri) {
+class S3OptionsTest : public ::testing::Test {
+ public:
+ void SetUp() {
+ // we set this environment variable to speed up tests by ensuring
+ // DefaultAWSCredentialsProviderChain does not query (inaccessible)
+ // EC2 metadata endpoint
+ ASSERT_OK(SetEnvVar("AWS_EC2_METADATA_DISABLED", "true"));
+ }
+ void TearDown() { ASSERT_OK(DelEnvVar("AWS_EC2_METADATA_DISABLED")); }
+};
+
+TEST_F(S3OptionsTest, FromUri) {
std::string path;
S3Options options;
@@ -197,6 +212,37 @@ TEST(S3Options, FromUri) {
ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path));
}
+TEST_F(S3OptionsTest, FromAccessKey) {
+ S3Options options;
+
+ // session token is optional and should default to empty string
+ options = S3Options::FromAccessKey("access", "secret");
+ ASSERT_EQ(options.GetAccessKey(), "access");
+ ASSERT_EQ(options.GetSecretKey(), "secret");
+ ASSERT_EQ(options.GetSessionToken(), "");
+
+ options = S3Options::FromAccessKey("access", "secret", "token");
+ ASSERT_EQ(options.GetAccessKey(), "access");
+ ASSERT_EQ(options.GetSecretKey(), "secret");
+ ASSERT_EQ(options.GetSessionToken(), "token");
+}
+
+TEST_F(S3OptionsTest, FromAssumeRole) {
+ S3Options options;
+
+ // arn should be only required argument
+ options = S3Options::FromAssumeRole("my_role_arn");
+ options = S3Options::FromAssumeRole("my_role_arn", "session");
+ options = S3Options::FromAssumeRole("my_role_arn", "session", "id");
+ options = S3Options::FromAssumeRole("my_role_arn", "session", "id", 42);
+
+ // test w/ custom STSClient (will not use DefaultAWSCredentialsProviderChain)
+ Aws::Auth::AWSCredentials test_creds = Aws::Auth::AWSCredentials("access", "secret");
+ std::shared_ptr<Aws::STS::STSClient> sts_client =
+ std::make_shared<Aws::STS::STSClient>(Aws::STS::STSClient(test_creds));
+ options = S3Options::FromAssumeRole("my_role_arn", "session", "id", 42, sts_client);
+}
+
////////////////////////////////////////////////////////////////////////////
// Basic test for the Minio test server.
diff --git a/python/manylinux1/scripts/build_aws_sdk.sh b/python/manylinux1/scripts/build_aws_sdk.sh
index fd19c35..f33f3e4 100755
--- a/python/manylinux1/scripts/build_aws_sdk.sh
+++ b/python/manylinux1/scripts/build_aws_sdk.sh
@@ -34,7 +34,7 @@ cmake .. -GNinja \
-DCMAKE_C_FLAGS=${CFLAGS} \
-DCMAKE_CXX_FLAGS=${CFLAGS} \
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
- -DBUILD_ONLY='s3;core;transfer;config' \
+ -DBUILD_ONLY='s3;core;transfer;config;identity-management;sts' \
-DBUILD_SHARED_LIBS=OFF \
-DENABLE_CURL_LOGGING=ON \
-DENABLE_UNITY_BUILD=ON \
diff --git a/python/manylinux201x/scripts/build_aws_sdk.sh b/python/manylinux201x/scripts/build_aws_sdk.sh
index 0471b4d..8271b74 100755
--- a/python/manylinux201x/scripts/build_aws_sdk.sh
+++ b/python/manylinux201x/scripts/build_aws_sdk.sh
@@ -30,7 +30,7 @@ pushd build
cmake .. -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
- -DBUILD_ONLY='s3;core;transfer;config' \
+ -DBUILD_ONLY='s3;core;transfer;config;identity-management;sts' \
-DBUILD_SHARED_LIBS=OFF \
-DENABLE_CURL_LOGGING=ON \
-DENABLE_UNITY_BUILD=ON \
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 3e6a03e..6720dff 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -48,9 +48,13 @@ def finalize_s3():
cdef class S3FileSystem(FileSystem):
"""S3-backed FileSystem implementation
- If neither access_key nor secret_key are provided then attempts to
- initialize from AWS environment variables, otherwise both access_key and
- secret_key must be provided.
+ If neither access_key nor secret_key are provided, and role_arn is also not
+ provided, then attempts to initialize from AWS environment variables,
+ otherwise both access_key and secret_key must be provided.
+
+ If role_arn is provided instead of access_key and secret_key, temporary
+ credentials will be fetched by issuing a request to STS to assume the
+ specified role.
Note: S3 buckets are special and the operations available on them may be
limited or more expensive than desired.
@@ -63,10 +67,24 @@ cdef class S3FileSystem(FileSystem):
secret_key: str, default None
AWS Secret Access key. Pass None to use the standard AWS environment
variables and/or configuration file.
+ session_token: str, default None
+ AWS Session Token. An optional session token, required if access_key
+ and secret_key are temporary credentials from STS.
anonymous: boolean, default False
Whether to connect anonymously if access_key and secret_key are None.
If true, will not attempt to look up credentials using standard AWS
configuration methods.
+ role_arn: str, default None
+ AWS Role ARN. If provided instead of access_key and secret_key,
+ temporary credentials will be fetched by assuming this role.
+ session_name: str, default None
+ An optional identifier for the assumed role session.
+ external_id: str, default None
+ An optional unique identifier that might be required when you assume
+ a role in another account.
+ load_frequency: int, default 900
+ The frequency (in seconds) with which temporary credentials from an
+ assumed role session will be refreshed.
region: str, default 'us-east-1'
AWS region to connect to.
scheme: str, default 'https'
@@ -81,9 +99,11 @@ cdef class S3FileSystem(FileSystem):
cdef:
CS3FileSystem* s3fs
- def __init__(self, *, access_key=None, secret_key=None, anonymous=False,
- region=None, scheme=None, endpoint_override=None,
- bint background_writes=True):
+ def __init__(self, *, access_key=None, secret_key=None, session_token=None,
+ anonymous=False, region=None, scheme=None,
+ endpoint_override=None, bint background_writes=True,
+ role_arn=None, session_name=None, external_id=None,
+ load_frequency=900):
cdef:
CS3Options options
shared_ptr[CS3FileSystem] wrapped
@@ -100,17 +120,42 @@ cdef class S3FileSystem(FileSystem):
'access_key and secret_key must be provided, '
'`access_key` is not set.'
)
- elif access_key is not None or secret_key is not None:
+
+ elif session_token is not None and (access_key is None or
+ secret_key is None):
+ raise ValueError(
+ 'In order to initialize a session with temporary credentials, '
+ 'both secret_key and access_key must be provided in addition '
+ 'to session_token.'
+ )
+
+ elif (access_key is not None or secret_key is not None):
if anonymous:
raise ValueError(
'Cannot pass anonymous=True together with access_key '
'and secret_key.')
+
+ if role_arn:
+ raise ValueError(
+ 'Cannot provide role_arn with access_key and secret_key')
+
+ if session_token is None:
+ session_token = ""
+
options = CS3Options.FromAccessKey(
tobytes(access_key),
- tobytes(secret_key)
+ tobytes(secret_key),
+ tobytes(session_token)
)
elif anonymous:
options = CS3Options.Anonymous()
+ elif role_arn is not None:
+ options = CS3Options.FromAssumeRole(
+ tobytes(role_arn),
+ tobytes(session_name),
+ tobytes(external_id),
+ load_frequency
+ )
else:
options = CS3Options.Defaults()
@@ -138,13 +183,32 @@ cdef class S3FileSystem(FileSystem):
def __reduce__(self):
cdef CS3Options opts = self.s3fs.options()
+
+ role_arn = frombytes(opts.role_arn)
+
+ # if role_arn is set, we should not re-use temporary credentials
+ # but instead recreate a new assume role session
+ if role_arn:
+ access_key = None
+ secret_key = None
+ session_token = None
+ else:
+ access_key = frombytes(opts.GetAccessKey())
+ secret_key = frombytes(opts.GetSecretKey())
+ session_token = frombytes(opts.GetSessionToken())
+
return (
S3FileSystem._reconstruct, (dict(
- access_key=frombytes(opts.GetAccessKey()),
- secret_key=frombytes(opts.GetSecretKey()),
+ access_key=access_key,
+ secret_key=secret_key,
+ session_token=session_token,
region=frombytes(opts.region),
scheme=frombytes(opts.scheme),
endpoint_override=frombytes(opts.endpoint_override),
+ role_arn=role_arn,
+ session_name=frombytes(opts.session_name),
+ external_id=frombytes(opts.external_id),
+ load_frequency=opts.load_frequency,
background_writes=opts.background_writes
),)
)
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 95fe6a3..14165cb 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -130,11 +130,17 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
c_string endpoint_override
c_string scheme
c_bool background_writes
+ c_string role_arn
+ c_string session_name
+ c_string external_id
+ int load_frequency
void ConfigureDefaultCredentials()
void ConfigureAccessKey(const c_string& access_key,
- const c_string& secret_key)
+ const c_string& secret_key,
+ const c_string& session_token)
c_string GetAccessKey()
c_string GetSecretKey()
+ c_string GetSessionToken()
c_bool Equals(const CS3Options& other)
@staticmethod
@@ -145,7 +151,14 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
@staticmethod
CS3Options FromAccessKey(const c_string& access_key,
- const c_string& secret_key)
+ const c_string& secret_key,
+ const c_string& session_token)
+
+ @staticmethod
+ CS3Options FromAssumeRole(const c_string& role_arn,
+ const c_string& session_name,
+ const c_string& external_id,
+ const int load_frequency)
cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem):
@staticmethod
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 50657ea..8559a07 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -987,8 +987,13 @@ def test_s3_options():
from pyarrow.fs import S3FileSystem
fs = S3FileSystem(access_key='access', secret_key='secret',
- region='us-east-1', scheme='https',
- endpoint_override='localhost:8999')
+ session_token='token', region='us-east-1',
+ scheme='https', endpoint_override='localhost:8999')
+ assert isinstance(fs, S3FileSystem)
+ assert pickle.loads(pickle.dumps(fs)) == fs
+
+ fs = S3FileSystem(role_arn='role', session_name='session',
+ external_id='id', load_frequency=100)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs
@@ -996,6 +1001,14 @@ def test_s3_options():
S3FileSystem(access_key='access')
with pytest.raises(ValueError):
S3FileSystem(secret_key='secret')
+ with pytest.raises(ValueError):
+ S3FileSystem(access_key='access', session_token='token')
+ with pytest.raises(ValueError):
+ S3FileSystem(secret_key='secret', session_token='token')
+ with pytest.raises(ValueError):
+ S3FileSystem(
+ access_key='access', secret_key='secret', role_arn='arn'
+ )
@pytest.mark.hdfs