You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/08/17 13:46:51 UTC

[arrow] branch master updated: ARROW-9517: [C++/Python] Add support for temporary credentials to S3Options

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1018a4f  ARROW-9517: [C++/Python] Add support for temporary credentials to S3Options
1018a4f is described below

commit 1018a4f50fb3cd9b5f622a25ad0c69b1389c9a96
Author: Matt Corley <ma...@23andme.com>
AuthorDate: Mon Aug 17 15:46:11 2020 +0200

    ARROW-9517: [C++/Python] Add support for temporary credentials to S3Options
    
    ### Background
    AWS provides a mechanism for using temporary credentials to access AWS resources.  When accessing AWS resources with a set of temporary credentials,[ users must provide a session token](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html) in addition to the usual access key id and secret access key.
    
    This PR adds support for providing a session token when initializing an S3FileSystem.
    
    Additionally, this PR adds support for auto-refreshing temporary credentials via STS AssumeRole: instead of passing explicit credentials, users supply the arn of a role to assume, and an `STSAssumeRoleCredentialsProvider` will be created to handle fetching temporary credentials by assuming this role.
    
    ### Changes
    #### C++
    - updated `S3Options.FromAccessKey` and `S3Options.ConfigureAccessKey` to accept an optional `session_token` argument (defaulting to empty string, in accordance with the convention of [Aws::Auth::AWSCredentials](https://sdk.amazonaws.com/cpp/api/0.12.9/d4/d27/class_aws_1_1_auth_1_1_a_w_s_credentials.html) as implemented in the AWS C++ SDK.)
    - updated `S3FileSystem` implementation to initialize S3Client with CredentialsProvider instead of directly passing AwsCredentials (enabling auto-refreshing of AwsCredentials from provider)
    - added `S3Options.GetSessionToken` method
    - added `S3Options.FromAssumeRole` and `S3Options.ConfigureAssumeRoleCredentials` to support auto-refreshing temporary credentials via STS AssumeRole.
    
    #### Python
    - updated cdef `CS3Options` to reflect updates to C++ library
    - added optional `session_token` argument to `S3FileSystem.__init__`.
    
    ### Testing
    - updated `test_s3_options` python unittest to include a mock session_token when initializing S3FileSystem.
    - updated `test_s3_options` python unittest to include relevant scenarios for provided a `role_arn` when initializing S3FileSystem.
    - updated `s3fs_test.cc` c++ tests to cover changes to FromAccessKey as well as the newly added FromAssumeRole.
    - successfully ran ctest suite and pytest suite with `--enable-s3` option.
    
    Closes #7803 from corleyma/dev/ARROW-9517-add-s3-option-session-token
    
    Authored-by: Matt Corley <ma...@23andme.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake   | 27 +++++++--
 cpp/src/arrow/filesystem/s3fs.cc              | 48 ++++++++++++---
 cpp/src/arrow/filesystem/s3fs.h               | 37 ++++++++++--
 cpp/src/arrow/filesystem/s3fs_test.cc         | 52 ++++++++++++++++-
 python/manylinux1/scripts/build_aws_sdk.sh    |  2 +-
 python/manylinux201x/scripts/build_aws_sdk.sh |  2 +-
 python/pyarrow/_s3fs.pyx                      | 84 +++++++++++++++++++++++----
 python/pyarrow/includes/libarrow_fs.pxd       | 17 +++++-
 python/pyarrow/tests/test_fs.py               | 17 +++++-
 9 files changed, 251 insertions(+), 35 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f983d58..6e50804 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2632,7 +2632,7 @@ macro(build_awssdk)
   set(AWSSDK_CMAKE_ARGS
       -DCMAKE_BUILD_TYPE=Release
       -DCMAKE_INSTALL_LIBDIR=lib
-      -DBUILD_ONLY=s3;core;config
+      -DBUILD_ONLY=s3;core;config;identity-management;sts
       -DENABLE_UNITY_BUILD=on
       -DENABLE_TESTING=off
       "-DCMAKE_C_FLAGS=${EP_C_FLAGS}"
@@ -2646,7 +2646,16 @@ macro(build_awssdk)
     AWSSDK_S3_SHARED_LIB
     "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX}"
     )
-  set(AWSSDK_SHARED_LIBS "${AWSSDK_CORE_SHARED_LIB}" "${AWSSDK_S3_SHARED_LIB}")
+  set(
+    AWSSDK_IAM_SHARED_LIB
+    "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-identity-management${CMAKE_SHARED_LIBRARY_SUFFIX}"
+    )
+  set(
+    AWSSDK_STS_SHARED_LIB
+    "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-sts${CMAKE_SHARED_LIBRARY_SUFFIX}"
+    )
+  set(AWSSDK_SHARED_LIBS "${AWSSDK_CORE_SHARED_LIB}" "${AWSSDK_S3_SHARED_LIB}"
+                         "${AWSSDK_IAM_SHARED_LIB}" "${AWSSDK_STS_SHARED_LIB}")
 
   externalproject_add(awssdk_ep
                       ${EP_LOG_OPTIONS}
@@ -2668,14 +2677,24 @@ if(ARROW_S3)
 
   # Need to customize the find_package() call, so cannot call resolve_dependency()
   if(AWSSDK_SOURCE STREQUAL "AUTO")
-    find_package(AWSSDK COMPONENTS config s3 transfer)
+    find_package(AWSSDK
+                 COMPONENTS config
+                            s3
+                            transfer
+                            identity-management
+                            sts)
     if(NOT AWSSDK_FOUND)
       build_awssdk()
     endif()
   elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
     build_awssdk()
   elseif(AWSSDK_SOURCE STREQUAL "SYSTEM")
-    find_package(AWSSDK REQUIRED COMPONENTS config s3 transfer)
+    find_package(AWSSDK REQUIRED
+                 COMPONENTS config
+                            s3
+                            transfer
+                            identity-management
+                            sts)
   endif()
 
   include_directories(SYSTEM ${AWSSDK_INCLUDE_DIR})
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 77cc642..cc58890 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -41,6 +41,7 @@
 #include <aws/core/client/RetryStrategy.h>
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
 #include <aws/core/utils/stream/PreallocatedStreamBuf.h>
+#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
 #include <aws/s3/S3Client.h>
 #include <aws/s3/model/AbortMultipartUploadRequest.h>
 #include <aws/s3/model/CompleteMultipartUploadRequest.h>
@@ -170,9 +171,19 @@ void S3Options::ConfigureAnonymousCredentials() {
 }
 
 void S3Options::ConfigureAccessKey(const std::string& access_key,
-                                   const std::string& secret_key) {
+                                   const std::string& secret_key,
+                                   const std::string& session_token) {
   credentials_provider = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
-      ToAwsString(access_key), ToAwsString(secret_key));
+      ToAwsString(access_key), ToAwsString(secret_key), ToAwsString(session_token));
+}
+
+void S3Options::ConfigureAssumeRoleCredentials(
+    const std::string& role_arn, const std::string& session_name,
+    const std::string& external_id, int load_frequency,
+    const std::shared_ptr<Aws::STS::STSClient>& stsClient) {
+  credentials_provider = std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
+      ToAwsString(role_arn), ToAwsString(session_name), ToAwsString(external_id),
+      load_frequency, stsClient);
 }
 
 std::string S3Options::GetAccessKey() const {
@@ -185,6 +196,11 @@ std::string S3Options::GetSecretKey() const {
   return std::string(FromAwsString(credentials.GetAWSSecretKey()));
 }
 
+std::string S3Options::GetSessionToken() const {
+  auto credentials = credentials_provider->GetAWSCredentials();
+  return std::string(FromAwsString(credentials.GetSessionToken()));
+}
+
 S3Options S3Options::Defaults() {
   S3Options options;
   options.ConfigureDefaultCredentials();
@@ -198,9 +214,24 @@ S3Options S3Options::Anonymous() {
 }
 
 S3Options S3Options::FromAccessKey(const std::string& access_key,
-                                   const std::string& secret_key) {
+                                   const std::string& secret_key,
+                                   const std::string& session_token) {
+  S3Options options;
+  options.ConfigureAccessKey(access_key, secret_key, session_token);
+  return options;
+}
+
+S3Options S3Options::FromAssumeRole(
+    const std::string& role_arn, const std::string& session_name,
+    const std::string& external_id, int load_frequency,
+    const std::shared_ptr<Aws::STS::STSClient>& stsClient) {
   S3Options options;
-  options.ConfigureAccessKey(access_key, secret_key);
+  options.role_arn = role_arn;
+  options.session_name = session_name;
+  options.external_id = external_id;
+  options.load_frequency = load_frequency;
+  options.ConfigureAssumeRoleCredentials(role_arn, session_name, external_id,
+                                         load_frequency, stsClient);
   return options;
 }
 
@@ -267,7 +298,8 @@ bool S3Options::Equals(const S3Options& other) const {
   return (region == other.region && endpoint_override == other.endpoint_override &&
           scheme == other.scheme && background_writes == other.background_writes &&
           GetAccessKey() == other.GetAccessKey() &&
-          GetSecretKey() == other.GetSecretKey());
+          GetSecretKey() == other.GetSecretKey() &&
+          GetSessionToken() == other.GetSessionToken());
 }
 
 namespace {
@@ -861,7 +893,7 @@ class S3FileSystem::Impl {
  public:
   S3Options options_;
   Aws::Client::ClientConfiguration client_config_;
-  Aws::Auth::AWSCredentials credentials_;
+  std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
   std::unique_ptr<Aws::S3::S3Client> client_;
 
   const int32_t kListObjectsMaxKeys = 1000;
@@ -873,7 +905,7 @@ class S3FileSystem::Impl {
   explicit Impl(S3Options options) : options_(std::move(options)) {}
 
   Status Init() {
-    credentials_ = options_.credentials_provider->GetAWSCredentials();
+    credentials_provider_ = options_.credentials_provider;
     client_config_.region = ToAwsString(options_.region);
     client_config_.endpointOverride = ToAwsString(options_.endpoint_override);
     if (options_.scheme == "http") {
@@ -893,7 +925,7 @@ class S3FileSystem::Impl {
 
     bool use_virtual_addressing = options_.endpoint_override.empty();
     client_.reset(
-        new Aws::S3::S3Client(credentials_, client_config_,
+        new Aws::S3::S3Client(credentials_provider_, client_config_,
                               Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
                               use_virtual_addressing));
     return Status::OK();
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index 6bf2993..aa39c30 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -29,8 +29,12 @@ namespace Aws {
 namespace Auth {
 
 class AWSCredentialsProvider;
+class STSAssumeRoleCredentialsProvider;
 
 }  // namespace Auth
+namespace STS {
+class STSClient;
+}
 }  // namespace Aws
 
 namespace arrow {
@@ -49,6 +53,15 @@ struct ARROW_EXPORT S3Options {
   /// S3 connection transport, default "https"
   std::string scheme = "https";
 
+  /// ARN of role to assume
+  std::string role_arn;
+  /// Optional identifier for an assumed role session.
+  std::string session_name;
+  /// Optional external idenitifer to pass to STS when assuming a role
+  std::string external_id;
+  /// Frequency (in seconds) to refresh temporary credentials from assumed role
+  int load_frequency;
+
   /// AWS credentials provider
   std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider;
 
@@ -62,10 +75,18 @@ struct ARROW_EXPORT S3Options {
   void ConfigureAnonymousCredentials();
 
   /// Configure with explicit access and secret key.
-  void ConfigureAccessKey(const std::string& access_key, const std::string& secret_key);
+  void ConfigureAccessKey(const std::string& access_key, const std::string& secret_key,
+                          const std::string& session_token = "");
+
+  /// Configure with credentials from an assumed role.
+  void ConfigureAssumeRoleCredentials(
+      const std::string& role_arn, const std::string& session_name = "",
+      const std::string& external_id = "", int load_frequency = 900,
+      const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
 
   std::string GetAccessKey() const;
   std::string GetSecretKey() const;
+  std::string GetSessionToken() const;
 
   bool Equals(const S3Options& other) const;
 
@@ -78,10 +99,18 @@ struct ARROW_EXPORT S3Options {
   ///
   /// This will only let you access public buckets.
   static S3Options Anonymous();
-  /// \brief Initialize with explicit access and secret key
+  /// \brief Initialize with explicit access and secret key.
+  ///
+  /// Optionally, a session token may also be provided for temporary credentials
+  /// (from STS).
   static S3Options FromAccessKey(const std::string& access_key,
-                                 const std::string& secret_key);
-
+                                 const std::string& secret_key,
+                                 const std::string& session_token = "");
+  /// \brief Initialize from an assumed role.
+  static S3Options FromAssumeRole(
+      const std::string& role_arn, const std::string& session_name = "",
+      const std::string& external_id = "", int load_frequency = 900,
+      const std::shared_ptr<Aws::STS::STSClient>& stsClient = NULLPTR);
   static Result<S3Options> FromUri(const ::arrow::internal::Uri& uri,
                                    std::string* out_path = NULLPTR);
   static Result<S3Options> FromUri(const std::string& uri,
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index d71e1bb..58dde3f 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -28,10 +28,10 @@
 //
 // See also:
 // https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-#include <boost/process.hpp>
-
 #include <gtest/gtest.h>
 
+#include <boost/process.hpp>
+
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
 #ifdef GetMessage
@@ -51,6 +51,7 @@
 #include <aws/s3/model/CreateBucketRequest.h>
 #include <aws/s3/model/GetObjectRequest.h>
 #include <aws/s3/model/PutObjectRequest.h>
+#include <aws/sts/STSClient.h>
 
 #include "arrow/filesystem/filesystem.h"
 #include "arrow/filesystem/s3_internal.h"
@@ -61,13 +62,16 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/io_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
 namespace fs {
 
+using ::arrow::internal::DelEnvVar;
 using ::arrow::internal::PlatformFilename;
+using ::arrow::internal::SetEnvVar;
 using ::arrow::internal::UriEscape;
 
 using ::arrow::fs::internal::ConnectRetryStrategy;
@@ -152,7 +156,18 @@ void AssertObjectContents(Aws::S3::S3Client* client, const std::string& bucket,
 ////////////////////////////////////////////////////////////////////////////
 // S3Options tests
 
-TEST(S3Options, FromUri) {
+class S3OptionsTest : public ::testing::Test {
+ public:
+  void SetUp() {
+    // we set this environment variable to speed up tests by ensuring
+    // DefaultAWSCredentialsProviderChain does not query (inaccessible)
+    // EC2 metadata endpoint
+    ASSERT_OK(SetEnvVar("AWS_EC2_METADATA_DISABLED", "true"));
+  }
+  void TearDown() { ASSERT_OK(DelEnvVar("AWS_EC2_METADATA_DISABLED")); }
+};
+
+TEST_F(S3OptionsTest, FromUri) {
   std::string path;
   S3Options options;
 
@@ -197,6 +212,37 @@ TEST(S3Options, FromUri) {
   ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path));
 }
 
+TEST_F(S3OptionsTest, FromAccessKey) {
+  S3Options options;
+
+  // session token is optional and should default to empty string
+  options = S3Options::FromAccessKey("access", "secret");
+  ASSERT_EQ(options.GetAccessKey(), "access");
+  ASSERT_EQ(options.GetSecretKey(), "secret");
+  ASSERT_EQ(options.GetSessionToken(), "");
+
+  options = S3Options::FromAccessKey("access", "secret", "token");
+  ASSERT_EQ(options.GetAccessKey(), "access");
+  ASSERT_EQ(options.GetSecretKey(), "secret");
+  ASSERT_EQ(options.GetSessionToken(), "token");
+}
+
+TEST_F(S3OptionsTest, FromAssumeRole) {
+  S3Options options;
+
+  // arn should be only required argument
+  options = S3Options::FromAssumeRole("my_role_arn");
+  options = S3Options::FromAssumeRole("my_role_arn", "session");
+  options = S3Options::FromAssumeRole("my_role_arn", "session", "id");
+  options = S3Options::FromAssumeRole("my_role_arn", "session", "id", 42);
+
+  // test w/ custom STSClient (will not use DefaultAWSCredentialsProviderChain)
+  Aws::Auth::AWSCredentials test_creds = Aws::Auth::AWSCredentials("access", "secret");
+  std::shared_ptr<Aws::STS::STSClient> sts_client =
+      std::make_shared<Aws::STS::STSClient>(Aws::STS::STSClient(test_creds));
+  options = S3Options::FromAssumeRole("my_role_arn", "session", "id", 42, sts_client);
+}
+
 ////////////////////////////////////////////////////////////////////////////
 // Basic test for the Minio test server.
 
diff --git a/python/manylinux1/scripts/build_aws_sdk.sh b/python/manylinux1/scripts/build_aws_sdk.sh
index fd19c35..f33f3e4 100755
--- a/python/manylinux1/scripts/build_aws_sdk.sh
+++ b/python/manylinux1/scripts/build_aws_sdk.sh
@@ -34,7 +34,7 @@ cmake .. -GNinja \
     -DCMAKE_C_FLAGS=${CFLAGS} \
     -DCMAKE_CXX_FLAGS=${CFLAGS} \
     -DCMAKE_INSTALL_PREFIX=${PREFIX} \
-    -DBUILD_ONLY='s3;core;transfer;config' \
+    -DBUILD_ONLY='s3;core;transfer;config;identity-management;sts' \
     -DBUILD_SHARED_LIBS=OFF \
     -DENABLE_CURL_LOGGING=ON \
     -DENABLE_UNITY_BUILD=ON \
diff --git a/python/manylinux201x/scripts/build_aws_sdk.sh b/python/manylinux201x/scripts/build_aws_sdk.sh
index 0471b4d..8271b74 100755
--- a/python/manylinux201x/scripts/build_aws_sdk.sh
+++ b/python/manylinux201x/scripts/build_aws_sdk.sh
@@ -30,7 +30,7 @@ pushd build
 cmake .. -GNinja \
     -DCMAKE_BUILD_TYPE=Release \
     -DCMAKE_INSTALL_PREFIX=${PREFIX} \
-    -DBUILD_ONLY='s3;core;transfer;config' \
+    -DBUILD_ONLY='s3;core;transfer;config;identity-management;sts' \
     -DBUILD_SHARED_LIBS=OFF \
     -DENABLE_CURL_LOGGING=ON \
     -DENABLE_UNITY_BUILD=ON \
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 3e6a03e..6720dff 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -48,9 +48,13 @@ def finalize_s3():
 cdef class S3FileSystem(FileSystem):
     """S3-backed FileSystem implementation
 
-    If neither access_key nor secret_key are provided then attempts to
-    initialize from AWS environment variables, otherwise both access_key and
-    secret_key must be provided.
+    If neither access_key nor secret_key are provided, and role_arn is also not
+    provided, then attempts to initialize from AWS environment variables,
+    otherwise both access_key and secret_key must be provided.
+
+    If role_arn is provided instead of access_key and secret_key, temporary
+    credentials will be fetched by issuing a request to STS to assume the
+    specified role.
 
     Note: S3 buckets are special and the operations available on them may be
     limited or more expensive than desired.
@@ -63,10 +67,24 @@ cdef class S3FileSystem(FileSystem):
     secret_key: str, default None
         AWS Secret Access key. Pass None to use the standard AWS environment
         variables and/or configuration file.
+    session_token: str, default None
+        AWS Session Token.  An optional session token, required if access_key
+        and secret_key are temporary credentials from STS.
     anonymous: boolean, default False
         Whether to connect anonymously if access_key and secret_key are None.
         If true, will not attempt to look up credentials using standard AWS
         configuration methods.
+    role_arn: str, default None
+        AWS Role ARN.  If provided instead of access_key and secret_key,
+        temporary credentials will be fetched by assuming this role.
+    session_name: str, default None
+        An optional identifier for the assumed role session.
+    external_id: str, default None
+        An optional unique identifier that might be required when you assume
+        a role in another account.
+    load_frequency: int, default 900
+        The frequency (in seconds) with which temporary credentials from an
+        assumed role session will be refreshed.
     region: str, default 'us-east-1'
         AWS region to connect to.
     scheme: str, default 'https'
@@ -81,9 +99,11 @@ cdef class S3FileSystem(FileSystem):
     cdef:
         CS3FileSystem* s3fs
 
-    def __init__(self, *, access_key=None, secret_key=None, anonymous=False,
-                 region=None, scheme=None, endpoint_override=None,
-                 bint background_writes=True):
+    def __init__(self, *, access_key=None, secret_key=None, session_token=None,
+                 anonymous=False, region=None, scheme=None,
+                 endpoint_override=None, bint background_writes=True,
+                 role_arn=None, session_name=None, external_id=None,
+                 load_frequency=900):
         cdef:
             CS3Options options
             shared_ptr[CS3FileSystem] wrapped
@@ -100,17 +120,42 @@ cdef class S3FileSystem(FileSystem):
                 'access_key and secret_key must be provided, '
                 '`access_key` is not set.'
             )
-        elif access_key is not None or secret_key is not None:
+
+        elif session_token is not None and (access_key is None or
+                                            secret_key is None):
+            raise ValueError(
+                'In order to initialize a session with temporary credentials, '
+                'both secret_key and access_key must be provided in addition '
+                'to session_token.'
+            )
+
+        elif (access_key is not None or secret_key is not None):
             if anonymous:
                 raise ValueError(
                     'Cannot pass anonymous=True together with access_key '
                     'and secret_key.')
+
+            if role_arn:
+                raise ValueError(
+                    'Cannot provide role_arn with access_key and secret_key')
+
+            if session_token is None:
+                session_token = ""
+
             options = CS3Options.FromAccessKey(
                 tobytes(access_key),
-                tobytes(secret_key)
+                tobytes(secret_key),
+                tobytes(session_token)
             )
         elif anonymous:
             options = CS3Options.Anonymous()
+        elif role_arn is not None:
+            options = CS3Options.FromAssumeRole(
+                tobytes(role_arn),
+                tobytes(session_name),
+                tobytes(external_id),
+                load_frequency
+            )
         else:
             options = CS3Options.Defaults()
 
@@ -138,13 +183,32 @@ cdef class S3FileSystem(FileSystem):
 
     def __reduce__(self):
         cdef CS3Options opts = self.s3fs.options()
+
+        role_arn = frombytes(opts.role_arn)
+
+        # if role_arn is set, we should not re-use temporary credentials
+        # but instead recreate a new assume role session
+        if role_arn:
+            access_key = None
+            secret_key = None
+            session_token = None
+        else:
+            access_key = frombytes(opts.GetAccessKey())
+            secret_key = frombytes(opts.GetSecretKey())
+            session_token = frombytes(opts.GetSessionToken())
+
         return (
             S3FileSystem._reconstruct, (dict(
-                access_key=frombytes(opts.GetAccessKey()),
-                secret_key=frombytes(opts.GetSecretKey()),
+                access_key=access_key,
+                secret_key=secret_key,
+                session_token=session_token,
                 region=frombytes(opts.region),
                 scheme=frombytes(opts.scheme),
                 endpoint_override=frombytes(opts.endpoint_override),
+                role_arn=role_arn,
+                session_name=frombytes(opts.session_name),
+                external_id=frombytes(opts.external_id),
+                load_frequency=opts.load_frequency,
                 background_writes=opts.background_writes
             ),)
         )
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 95fe6a3..14165cb 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -130,11 +130,17 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_string endpoint_override
         c_string scheme
         c_bool background_writes
+        c_string role_arn
+        c_string session_name
+        c_string external_id
+        int load_frequency
         void ConfigureDefaultCredentials()
         void ConfigureAccessKey(const c_string& access_key,
-                                const c_string& secret_key)
+                                const c_string& secret_key,
+                                const c_string& session_token)
         c_string GetAccessKey()
         c_string GetSecretKey()
+        c_string GetSessionToken()
         c_bool Equals(const CS3Options& other)
 
         @staticmethod
@@ -145,7 +151,14 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
 
         @staticmethod
         CS3Options FromAccessKey(const c_string& access_key,
-                                 const c_string& secret_key)
+                                 const c_string& secret_key,
+                                 const c_string& session_token)
+
+        @staticmethod
+        CS3Options FromAssumeRole(const c_string& role_arn,
+                                  const c_string& session_name,
+                                  const c_string& external_id,
+                                  const int load_frequency)
 
     cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem):
         @staticmethod
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 50657ea..8559a07 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -987,8 +987,13 @@ def test_s3_options():
     from pyarrow.fs import S3FileSystem
 
     fs = S3FileSystem(access_key='access', secret_key='secret',
-                      region='us-east-1', scheme='https',
-                      endpoint_override='localhost:8999')
+                      session_token='token', region='us-east-1',
+                      scheme='https', endpoint_override='localhost:8999')
+    assert isinstance(fs, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs)) == fs
+
+    fs = S3FileSystem(role_arn='role', session_name='session',
+                      external_id='id', load_frequency=100)
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
@@ -996,6 +1001,14 @@ def test_s3_options():
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):
         S3FileSystem(secret_key='secret')
+    with pytest.raises(ValueError):
+        S3FileSystem(access_key='access', session_token='token')
+    with pytest.raises(ValueError):
+        S3FileSystem(secret_key='secret', session_token='token')
+    with pytest.raises(ValueError):
+        S3FileSystem(
+            access_key='access', secret_key='secret', role_arn='arn'
+        )
 
 
 @pytest.mark.hdfs