You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by GitBox <gi...@apache.org> on 2021/03/08 16:26:05 UTC

[GitHub] [nifi-minifi-cpp] lordgamez commented on a change in pull request #975: MINIFICPP-1400 Create ListS3 processor

lordgamez commented on a change in pull request #975:
URL: https://github.com/apache/nifi-minifi-cpp/pull/975#discussion_r589557484



##########
File path: extensions/aws/processors/ListS3.h
##########
@@ -0,0 +1,111 @@
+/**
+ * @file ListS3.h
+ * ListS3 class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+#include <tuple>
+#include <memory>
+
+#include "S3Processor.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+class ListS3 : public S3Processor {
+ public:
+  static constexpr char const* ProcessorName = "ListS3";
+  static const std::string LATEST_LISTED_KEY_PREFIX;
+  static const std::string LATEST_LISTED_KEY_TIMESTAMP;
+
+  // Supported Properties
+  static const core::Property Delimiter;
+  static const core::Property Prefix;
+  static const core::Property UseVersions;
+  static const core::Property MinimumObjectAge;
+  static const core::Property WriteObjectTags;
+  static const core::Property WriteUserMetadata;
+  static const core::Property RequesterPays;
+
+  // Supported Relationships
+  static const core::Relationship Success;
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid = minifi::utils::Identifier())
+    : S3Processor(name, uuid, logging::LoggerFactory<ListS3>::getLogger()) {
+  }
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid, std::unique_ptr<aws::s3::S3RequestSender> s3_request_sender)
+    : S3Processor(std::move(name), uuid, logging::LoggerFactory<ListS3>::getLogger(), std::move(s3_request_sender)) {
+  }

Review comment:
       Updated in all S3 processors in 65dde7c661bfac32918522999752e6310a5becf7

##########
File path: docker/test/integration/steps/steps.py
##########
@@ -227,10 +238,20 @@ def step_impl(context, cluster_name):
 def step_impl(context):
     context.test.start()
 
+@when("content \"{content}\" is added to file \"{file_name}\" present in directory \"{path}\" {seconds:d} seconds later")
+def step_impl(context, content, file_name, path, seconds):
+    time.sleep(seconds)
+    context.test.add_test_data(path, content, file_name)
+
 @then("a flowfile with the content \"{content}\" is placed in the monitored directory in less than {duration}")
 def step_impl(context, content, duration):
     context.test.check_for_file_with_content_generated(content, timeparse(duration))
 
+@then("{number_of_files:d} flowfiles are placed in the monitored directory in less than {duration}")
+@then("{number_of_files:d} flowfile is placed in the monitored directory in less than {duration}")

Review comment:
       Updated in 623540cadc8eae8f3c1de4439af140f23d8ba5bf

##########
File path: extensions/aws/processors/ListS3.cpp
##########
@@ -0,0 +1,295 @@
+/**
+ * @file ListS3.cpp
+ * ListS3 class implementation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ListS3.h"
+
+#include <tuple>
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <memory>
+
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+const std::string ListS3::LATEST_LISTED_KEY_PREFIX = "listed_key.";
+const std::string ListS3::LATEST_LISTED_KEY_TIMESTAMP = "listed_timestamp";
+
+const core::Property ListS3::Delimiter(
+  core::PropertyBuilder::createProperty("Delimiter")
+    ->withDescription("The string used to delimit directories within the bucket. Please consult the AWS documentation for the correct use of this field.")
+    ->build());
+const core::Property ListS3::Prefix(
+  core::PropertyBuilder::createProperty("Prefix")
+    ->withDescription("The prefix used to filter the object list. In most cases, it should end with a forward slash ('/').")
+    ->build());
+const core::Property ListS3::UseVersions(
+  core::PropertyBuilder::createProperty("Use Versions")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("Specifies whether to use S3 versions, if applicable. If false, only the latest version of each object will be returned.")
+    ->build());
+const core::Property ListS3::MinimumObjectAge(
+  core::PropertyBuilder::createProperty("Minimum Object Age")
+    ->isRequired(true)
+    ->withDefaultValue<core::TimePeriodValue>("0 sec")
+    ->withDescription("The minimum age that an S3 object must be in order to be considered; any object younger than this amount of time (according to last modification date) will be ignored.")
+    ->build());
+const core::Property ListS3::WriteObjectTags(
+  core::PropertyBuilder::createProperty("Write Object Tags")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the tags associated with the S3 object will be written as FlowFile attributes.")
+    ->build());
+const core::Property ListS3::WriteUserMetadata(
+  core::PropertyBuilder::createProperty("Write User Metadata")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If set to 'true', the user defined metadata associated with the S3 object will be added to FlowFile attributes/records.")
+    ->build());
+const core::Property ListS3::RequesterPays(
+  core::PropertyBuilder::createProperty("Requester Pays")
+    ->isRequired(true)
+    ->withDefaultValue<bool>(false)
+    ->withDescription("If true, indicates that the requester consents to pay any charges associated with listing the S3 bucket. This sets the 'x-amz-request-payer' header to 'requester'. "
+                      "Note that this setting is only used if Write User Metadata is true.")
+    ->build());
+
+const core::Relationship ListS3::Success("success", "FlowFiles are routed to success relationship");
+
+void ListS3::initialize() {
+  // Add new supported properties
+  updateSupportedProperties({Delimiter, Prefix, UseVersions, MinimumObjectAge, WriteObjectTags, WriteUserMetadata, RequesterPays});
+  // Set the supported relationships
+  setSupportedRelationships({Success});
+}
+
+void ListS3::onSchedule(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) {
+  S3Processor::onSchedule(context, sessionFactory);
+
+  state_manager_ = context->getStateManager();
+  if (state_manager_ == nullptr) {
+    throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+  }
+
+  auto common_properties = getCommonELSupportedProperties(context, nullptr);
+  if (!common_properties) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Required property is not set or invalid");
+  }
+  configureS3Wrapper(common_properties.value());
+  list_request_params_.bucket = common_properties->bucket;
+
+  context->getProperty(Delimiter.getName(), list_request_params_.delimiter);
+  logger_->log_debug("ListS3: Delimiter [%s]", list_request_params_.delimiter);
+
+  context->getProperty(Prefix.getName(), list_request_params_.prefix);
+  logger_->log_debug("ListS3: Prefix [%s]", list_request_params_.prefix);
+
+  context->getProperty(UseVersions.getName(), list_request_params_.use_versions);
+  logger_->log_debug("ListS3: UseVersions [%s]", list_request_params_.use_versions ? "true" : "false");
+
+  std::string min_obj_age_str;
+  if (!context->getProperty(MinimumObjectAge.getName(), min_obj_age_str) || min_obj_age_str.empty() || !core::Property::getTimeMSFromString(min_obj_age_str, list_request_params_.min_object_age)) {
+    throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Minimum Object Age missing or invalid");
+  }
+  logger_->log_debug("S3Processor: Minimum Object Age [%llud]", min_obj_age_str, list_request_params_.min_object_age);
+
+  context->getProperty(WriteObjectTags.getName(), write_object_tags_);
+  logger_->log_debug("ListS3: WriteObjectTags [%s]", write_object_tags_ ? "true" : "false");
+
+  context->getProperty(WriteUserMetadata.getName(), write_user_metadata_);
+  logger_->log_debug("ListS3: WriteUserMetadata [%s]", write_user_metadata_ ? "true" : "false");
+
+  context->getProperty(RequesterPays.getName(), requester_pays_);
+  logger_->log_debug("ListS3: RequesterPays [%s]", requester_pays_ ? "true" : "false");
+}
+
+void ListS3::writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_object_tags_) {
+    return;
+  }
+
+  auto get_object_tags_result = s3_wrapper_.getObjectTags(bucket, object_attributes.filename, object_attributes.version);
+  if (get_object_tags_result) {
+    for (const auto& tag : get_object_tags_result.value()) {
+      session->putAttribute(flow_file, "s3.tag." + tag.first, tag.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object tags for object %s in bucket %s", object_attributes.filename, bucket);
+  }
+}
+
+void ListS3::writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file) {
+  if (!write_user_metadata_) {
+    return;
+  }
+
+  aws::s3::HeadObjectRequestParameters params;
+  params.bucket = list_request_params_.bucket;
+  params.object_key = object_attributes.filename;
+  params.version = object_attributes.version;
+  params.requester_pays = requester_pays_;
+  auto head_object_tags_result = s3_wrapper_.headObject(params);
+  if (head_object_tags_result) {
+    for (const auto& metadata : head_object_tags_result->user_metadata_map) {
+      session->putAttribute(flow_file, "s3.user.metadata." + metadata.first, metadata.second);
+    }
+  } else {
+    logger_->log_warn("Failed to get object metadata for object %s in bucket %s", params.object_key, params.bucket);
+  }
+}
+
+std::vector<std::string> ListS3::getLatestListedKeys(const std::unordered_map<std::string, std::string> &state) {

Review comment:
       Updated in 0c24748c92b8fdd6c4ec1070e86a30916e4db885

##########
File path: extensions/aws/processors/ListS3.h
##########
@@ -0,0 +1,111 @@
+/**
+ * @file ListS3.h
+ * ListS3 class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+#include <tuple>
+#include <memory>
+
+#include "S3Processor.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+class ListS3 : public S3Processor {
+ public:
+  static constexpr char const* ProcessorName = "ListS3";
+  static const std::string LATEST_LISTED_KEY_PREFIX;
+  static const std::string LATEST_LISTED_KEY_TIMESTAMP;
+
+  // Supported Properties
+  static const core::Property Delimiter;
+  static const core::Property Prefix;
+  static const core::Property UseVersions;
+  static const core::Property MinimumObjectAge;
+  static const core::Property WriteObjectTags;
+  static const core::Property WriteUserMetadata;
+  static const core::Property RequesterPays;
+
+  // Supported Relationships
+  static const core::Relationship Success;
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid = minifi::utils::Identifier())
+    : S3Processor(name, uuid, logging::LoggerFactory<ListS3>::getLogger()) {
+  }
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid, std::unique_ptr<aws::s3::S3RequestSender> s3_request_sender)
+    : S3Processor(std::move(name), uuid, logging::LoggerFactory<ListS3>::getLogger(), std::move(s3_request_sender)) {
+  }
+
+  ~ListS3() override = default;
+
+  void initialize() override;
+  void onSchedule(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) override;
+  void onTrigger(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSession> &session) override;
+
+ private:
+  struct ListingState {
+    int64_t listed_key_timestamp = 0;
+    std::vector<std::string> listed_keys;
+
+    bool wasObjectListedAlready(const aws::s3::ListedObjectAttributes &object_attributes) const;
+    void updateState(const aws::s3::ListedObjectAttributes &object_attributes);
+  };
+
+  void writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file);
+  void writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file);
+  std::vector<std::string> getLatestListedKeys(const std::unordered_map<std::string, std::string> &state);
+  uint64_t getLatestListedKeyTimestamp(const std::unordered_map<std::string, std::string> &state);
+  ListingState getCurrentState(const std::shared_ptr<core::ProcessContext> &context);
+  void storeState(const std::shared_ptr<core::ProcessContext> &context, const ListingState &latest_listing_state);

Review comment:
       Updated in 0c24748c92b8fdd6c4ec1070e86a30916e4db885

##########
File path: extensions/aws/processors/ListS3.h
##########
@@ -0,0 +1,111 @@
+/**
+ * @file ListS3.h
+ * ListS3 class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+#include <tuple>
+#include <memory>
+
+#include "S3Processor.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace aws {
+namespace processors {
+
+class ListS3 : public S3Processor {
+ public:
+  static constexpr char const* ProcessorName = "ListS3";
+  static const std::string LATEST_LISTED_KEY_PREFIX;
+  static const std::string LATEST_LISTED_KEY_TIMESTAMP;
+
+  // Supported Properties
+  static const core::Property Delimiter;
+  static const core::Property Prefix;
+  static const core::Property UseVersions;
+  static const core::Property MinimumObjectAge;
+  static const core::Property WriteObjectTags;
+  static const core::Property WriteUserMetadata;
+  static const core::Property RequesterPays;
+
+  // Supported Relationships
+  static const core::Relationship Success;
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid = minifi::utils::Identifier())
+    : S3Processor(name, uuid, logging::LoggerFactory<ListS3>::getLogger()) {
+  }
+
+  explicit ListS3(std::string name, minifi::utils::Identifier uuid, std::unique_ptr<aws::s3::S3RequestSender> s3_request_sender)
+    : S3Processor(std::move(name), uuid, logging::LoggerFactory<ListS3>::getLogger(), std::move(s3_request_sender)) {
+  }
+
+  ~ListS3() override = default;
+
+  void initialize() override;
+  void onSchedule(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) override;
+  void onTrigger(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSession> &session) override;
+
+ private:
+  struct ListingState {
+    int64_t listed_key_timestamp = 0;
+    std::vector<std::string> listed_keys;
+
+    bool wasObjectListedAlready(const aws::s3::ListedObjectAttributes &object_attributes) const;
+    void updateState(const aws::s3::ListedObjectAttributes &object_attributes);
+  };
+
+  void writeObjectTags(
+    const std::string &bucket,
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file);
+  void writeUserMetadata(
+    const aws::s3::ListedObjectAttributes &object_attributes,
+    const std::shared_ptr<core::ProcessSession> &session,
+    const std::shared_ptr<core::FlowFile> &flow_file);
+  std::vector<std::string> getLatestListedKeys(const std::unordered_map<std::string, std::string> &state);
+  uint64_t getLatestListedKeyTimestamp(const std::unordered_map<std::string, std::string> &state);
+  ListingState getCurrentState(const std::shared_ptr<core::ProcessContext> &context);
+  void storeState(const std::shared_ptr<core::ProcessContext> &context, const ListingState &latest_listing_state);
+  void createNewFlowFile(
+    const std::shared_ptr<core::ProcessSession> &session,
+    const aws::s3::ListedObjectAttributes &object_attributes);

Review comment:
       Updated in 0c24748c92b8fdd6c4ec1070e86a30916e4db885

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}

Review comment:
       Updated in 0c24748c92b8fdd6c4ec1070e86a30916e4db885

##########
File path: extensions/aws/s3/S3Wrapper.h
##########
@@ -31,11 +47,190 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-class S3Wrapper : public S3WrapperBase {
+static const std::unordered_map<std::string, Aws::S3::Model::StorageClass> STORAGE_CLASS_MAP {

Review comment:
       These maps are used to initialize some static class members in PutS3Objects. If `static` is removed, because of the order of the initialization those members will not be initialized.

##########
File path: extensions/aws/s3/S3Wrapper.h
##########
@@ -31,11 +47,190 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-class S3Wrapper : public S3WrapperBase {
+static const std::unordered_map<std::string, Aws::S3::Model::StorageClass> STORAGE_CLASS_MAP {
+  {"Standard", Aws::S3::Model::StorageClass::STANDARD},
+  {"ReducedRedundancy", Aws::S3::Model::StorageClass::REDUCED_REDUNDANCY},
+  {"StandardIA", Aws::S3::Model::StorageClass::STANDARD_IA},
+  {"OnezoneIA", Aws::S3::Model::StorageClass::ONEZONE_IA},
+  {"IntelligentTiering", Aws::S3::Model::StorageClass::INTELLIGENT_TIERING},
+  {"Glacier", Aws::S3::Model::StorageClass::GLACIER},
+  {"DeepArchive", Aws::S3::Model::StorageClass::DEEP_ARCHIVE}
+};
+
+static const std::map<Aws::S3::Model::ObjectStorageClass, std::string> OBJECT_STORAGE_CLASS_MAP {
+  {Aws::S3::Model::ObjectStorageClass::STANDARD, "Standard"},
+  {Aws::S3::Model::ObjectStorageClass::REDUCED_REDUNDANCY, "ReducedRedundancy"},
+  {Aws::S3::Model::ObjectStorageClass::STANDARD_IA, "StandardIA"},
+  {Aws::S3::Model::ObjectStorageClass::ONEZONE_IA, "OnezoneIA"},
+  {Aws::S3::Model::ObjectStorageClass::INTELLIGENT_TIERING, "IntelligentTiering"},
+  {Aws::S3::Model::ObjectStorageClass::GLACIER, "Glacier"},
+  {Aws::S3::Model::ObjectStorageClass::DEEP_ARCHIVE, "DeepArchive"}
+};
+
+static const std::map<Aws::S3::Model::ObjectVersionStorageClass, std::string> VERSION_STORAGE_CLASS_MAP {
+  {Aws::S3::Model::ObjectVersionStorageClass::STANDARD, "Standard"}
+};
+
+static const std::unordered_map<std::string, Aws::S3::Model::ServerSideEncryption> SERVER_SIDE_ENCRYPTION_MAP {
+  {"None", Aws::S3::Model::ServerSideEncryption::NOT_SET},
+  {"AES256", Aws::S3::Model::ServerSideEncryption::AES256},
+  {"aws_kms", Aws::S3::Model::ServerSideEncryption::aws_kms},
+};
+
+static const std::unordered_map<std::string, Aws::S3::Model::ObjectCannedACL> CANNED_ACL_MAP {
+  {"BucketOwnerFullControl", Aws::S3::Model::ObjectCannedACL::bucket_owner_full_control},
+  {"BucketOwnerRead", Aws::S3::Model::ObjectCannedACL::bucket_owner_read},
+  {"AuthenticatedRead", Aws::S3::Model::ObjectCannedACL::authenticated_read},
+  {"PublicReadWrite", Aws::S3::Model::ObjectCannedACL::public_read_write},
+  {"PublicRead", Aws::S3::Model::ObjectCannedACL::public_read},
+  {"Private", Aws::S3::Model::ObjectCannedACL::private_},
+  {"AwsExecRead", Aws::S3::Model::ObjectCannedACL::aws_exec_read},
+};
+
+struct Expiration {
+  std::string expiration_time;
+  std::string expiration_time_rule_id;
+};
+
+struct PutObjectResult {
+  std::string version;
+  std::string etag;
+  std::string expiration;
+  std::string ssealgorithm;
+};
+
+struct PutObjectRequestParameters {
+  std::string bucket;
+  std::string object_key;
+  std::string storage_class;
+  std::string server_side_encryption;
+  std::string content_type;
+  std::map<std::string, std::string> user_metadata_map;
+  std::string fullcontrol_user_list;
+  std::string read_permission_user_list;
+  std::string read_acl_user_list;
+  std::string write_acl_user_list;
+  std::string canned_acl;
+};
+
+struct GetObjectRequestParameters {
+  std::string bucket;
+  std::string object_key;
+  std::string version;
+  bool requester_pays = false;
+};
+
+struct HeadObjectResult {
+  std::string path;
+  std::string absolute_path;
+  std::string filename;
+  std::string mime_type;
+  std::string etag;
+  Expiration expiration;
+  std::string ssealgorithm;
+  std::string version;
+  std::map<std::string, std::string> user_metadata_map;
+
+  void setFilePaths(const std::string& key);
+};
+
+struct GetObjectResult : public HeadObjectResult {
+  int64_t write_size = 0;
+};
+
+struct ListRequestParameters {
+  std::string bucket;
+  std::string delimiter;
+  std::string prefix;
+  bool use_versions = false;
+  uint64_t min_object_age = 0;
+};
+
+struct ListedObjectAttributes {
+  std::string filename;
+  std::string etag;
+  bool is_latest = false;
+  int64_t last_modified = 0;
+  int length = 0;
+  std::string store_class;
+  std::string version;
+};
+
+using HeadObjectRequestParameters = GetObjectRequestParameters;
+
+class S3Wrapper {
+ public:
+  S3Wrapper();
+  explicit S3Wrapper(std::unique_ptr<S3RequestSender> request_sender);
+
+  void setCredentials(const Aws::Auth::AWSCredentials& cred);
+  void setRegion(const Aws::String& region);
+  void setTimeout(uint64_t timeout);
+  void setEndpointOverrideUrl(const Aws::String& url);
+  void setProxy(const ProxyOptions& proxy);
+
+  minifi::utils::optional<PutObjectResult> putObject(const PutObjectRequestParameters& options, std::shared_ptr<Aws::IOStream> data_stream);
+  bool deleteObject(const std::string& bucket, const std::string& object_key, const std::string& version = "");
+  minifi::utils::optional<GetObjectResult> getObject(const GetObjectRequestParameters& get_object_params, const std::shared_ptr<io::BaseStream>& fetched_body);
+  minifi::utils::optional<std::vector<ListedObjectAttributes>> listBucket(const ListRequestParameters& params);
+  minifi::utils::optional<std::map<std::string, std::string>> getObjectTags(const std::string& bucket, const std::string& object_key, const std::string& version = 0);
+  minifi::utils::optional<HeadObjectResult> headObject(const HeadObjectRequestParameters& head_object_params);
+
+  virtual ~S3Wrapper() = default;
+
  protected:
-  minifi::utils::optional<Aws::S3::Model::PutObjectResult> sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) override;
-  bool sendDeleteObjectRequest(const Aws::S3::Model::DeleteObjectRequest& request) override;
-  minifi::utils::optional<Aws::S3::Model::GetObjectResult> sendGetObjectRequest(const Aws::S3::Model::GetObjectRequest& request) override;
+  void setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const;
+  int64_t writeFetchedBody(Aws::IOStream& source, const int64_t data_size, const std::shared_ptr<io::BaseStream>& output);
+  Expiration getExpiration(const std::string& expiration);
+  static std::string getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption);
+
+  minifi::utils::optional<std::vector<ListedObjectAttributes>> listVersions(const ListRequestParameters& params);
+  minifi::utils::optional<std::vector<ListedObjectAttributes>> listObjects(const ListRequestParameters& params);
+  void addListResults(const Aws::Vector<Aws::S3::Model::ObjectVersion>& content, uint64_t min_object_age, std::vector<ListedObjectAttributes>& listed_objects);
+  void addListResults(const Aws::Vector<Aws::S3::Model::Object>& content, uint64_t min_object_age, std::vector<ListedObjectAttributes>& listed_objects);
+
+  template<typename ListRequest>
+  ListRequest createListRequest(const ListRequestParameters& params) {

Review comment:
       Updated in 6a44b4beb6b511546226d01655cad0d5c8688fc7

##########
File path: extensions/aws/utils/AWSInitializer.h
##########
@@ -20,6 +20,7 @@
 #pragma once
 
 #include "aws/core/Aws.h"
+#include "aws/core/utils/xml/XmlSerializer.h"

Review comment:
       Moved to `S3RequestSender` header in c2539fcb0e56fb00750f2c3b4dd657f34835f8ec. The XMLDocument class is needed to instantiate the S3 Request classes and it is not implicitly included.

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) {
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const Aws::S3::Model::ServerSideEncryption&> pair) {

Review comment:
       Nice catch, thanks, it was a typo, fixed in ee41601f1811fbc6bc575bdae25d450709d29f50

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {

Review comment:
       I am not sure how would you go about it, could you please elaborate? How would you initialize the `unique_ptr` with a `S3RequestSender&&`?

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) {
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t data_size, const std::shared_ptr<io::BaseStream>& output) {

Review comment:
       Updated in 322c3f0d787b1fbd68518f572337951d466bdf3a

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) {
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const int64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);

Review comment:
       Updated in 322c3f0d787b1fbd68518f572337951d466bdf3a I think the change is also present in the PutAzureBlobStorage PR

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) {
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const int64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = (std::min)(data_size - write_size, BUFFER_SIZE);
+    if (!source.read(reinterpret_cast<char*>(buffer.data()), next_write_size)) {
+      return -1;
+    }
+    auto ret = output->write(buffer.data(), next_write_size);
+    if (ret < 0) {
+      return ret;
+    }
+    write_size += next_write_size;
+  }
+  return write_size;
+}
+
+minifi::utils::optional<GetObjectResult> S3Wrapper::getObject(const GetObjectRequestParameters& get_object_params, const std::shared_ptr<io::BaseStream>& out_body) {
+  auto request = createFetchObjectRequest<Aws::S3::Model::GetObjectRequest>(get_object_params);
+  auto aws_result = request_sender_->sendGetObjectRequest(request);
+  if (!aws_result) {
     return minifi::utils::nullopt;
   }
+  auto result = fillFetchObjectResult<Aws::S3::Model::GetObjectResult, GetObjectResult>(get_object_params, aws_result.value());
+  result.write_size = writeFetchedBody(aws_result->GetBody(), aws_result->GetContentLength(), out_body);
+  return result;
+}
+
+void S3Wrapper::addListResults(const Aws::Vector<Aws::S3::Model::ObjectVersion>& content, const uint64_t min_object_age, std::vector<ListedObjectAttributes>& listed_objects) {
+  for (const auto& version : content) {
+    if (last_bucket_list_timestamp_ - min_object_age < version.GetLastModified().Millis()) {
+      logger_->log_debug("Object version '%s' of key '%s' skipped due to minimum object age filter", version.GetVersionId(), version.GetKey());
+      continue;
+    }
+
+    ListedObjectAttributes attributes;
+    attributes.etag = minifi::utils::StringUtils::removeFramingCharacters(version.GetETag(), '"');
+    attributes.filename = version.GetKey();
+    attributes.is_latest = version.GetIsLatest();
+    attributes.last_modified = version.GetLastModified().Millis();
+    attributes.length = version.GetSize();

Review comment:
       Good catch, updated in 322c3f0d787b1fbd68518f572337951d466bdf3a

##########
File path: extensions/aws/s3/S3Wrapper.cpp
##########
@@ -30,46 +37,253 @@ namespace minifi {
 namespace aws {
 namespace s3 {
 
-minifi::utils::optional<Aws::S3::Model::PutObjectResult> S3Wrapper::sendPutObjectRequest(const Aws::S3::Model::PutObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.PutObject(request);
+void HeadObjectResult::setFilePaths(const std::string& key) {
+  absolute_path = key;
+  std::tie(path, filename) = minifi::utils::file::FileUtils::split_path(key, true /*force_posix*/);
+}
+
+S3Wrapper::S3Wrapper() : request_sender_(minifi::utils::make_unique<S3ClientRequestSender>()) {
+}
+
+S3Wrapper::S3Wrapper(std::unique_ptr<S3RequestSender> request_sender) : request_sender_(std::move(request_sender)) {
+}
+
+void S3Wrapper::setCredentials(const Aws::Auth::AWSCredentials& cred) {
+  request_sender_->setCredentials(cred);
+}
+
+void S3Wrapper::setRegion(const Aws::String& region) {
+  request_sender_->setRegion(region);
+}
+
+void S3Wrapper::setTimeout(uint64_t timeout) {
+  request_sender_->setTimeout(timeout);
+}
+
+void S3Wrapper::setEndpointOverrideUrl(const Aws::String& url) {
+  request_sender_->setEndpointOverrideUrl(url);
+}
+
+void S3Wrapper::setProxy(const ProxyOptions& proxy) {
+  request_sender_->setProxy(proxy);
+}
+
+void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const std::string& canned_acl) const {
+  if (canned_acl.empty() || CANNED_ACL_MAP.find(canned_acl) == CANNED_ACL_MAP.end())
+    return;
+
+  logger_->log_debug("Setting AWS canned ACL [%s]", canned_acl);
+  request.SetACL(CANNED_ACL_MAP.at(canned_acl));
+}
+
+Expiration S3Wrapper::getExpiration(const std::string& expiration) {
+  minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\"");
+  const auto match = expr.match(expiration);
+  const auto& results = expr.getResult();
+  if (!match || results.size() < 3)
+    return Expiration{};
+  return Expiration{results[1], results[2]};
+}
+
+std::string S3Wrapper::getEncryptionString(Aws::S3::Model::ServerSideEncryption encryption) {
+  if (encryption == Aws::S3::Model::ServerSideEncryption::NOT_SET) {
+    return "";
+  }
+
+  auto it = std::find_if(SERVER_SIDE_ENCRYPTION_MAP.begin(), SERVER_SIDE_ENCRYPTION_MAP.end(),
+    [&](const std::pair<std::string, const Aws::S3::Model::ServerSideEncryption&> pair) {
+      return pair.second == encryption;
+    });
+  if (it != SERVER_SIDE_ENCRYPTION_MAP.end()) {
+    return it->first;
+  }
+  return "";
+}
+
+minifi::utils::optional<PutObjectResult> S3Wrapper::putObject(const PutObjectRequestParameters& put_object_params, std::shared_ptr<Aws::IOStream> data_stream) {
+  Aws::S3::Model::PutObjectRequest request;
+  request.SetBucket(put_object_params.bucket);
+  request.SetKey(put_object_params.object_key);
+  request.SetStorageClass(STORAGE_CLASS_MAP.at(put_object_params.storage_class));
+  request.SetServerSideEncryption(SERVER_SIDE_ENCRYPTION_MAP.at(put_object_params.server_side_encryption));
+  request.SetContentType(put_object_params.content_type);
+  request.SetMetadata(put_object_params.user_metadata_map);
+  request.SetBody(data_stream);
+  request.SetGrantFullControl(put_object_params.fullcontrol_user_list);
+  request.SetGrantRead(put_object_params.read_permission_user_list);
+  request.SetGrantReadACP(put_object_params.read_acl_user_list);
+  request.SetGrantWriteACP(put_object_params.write_acl_user_list);
+  setCannedAcl(request, put_object_params.canned_acl);
+
+  auto aws_result = request_sender_->sendPutObjectRequest(request);
+  if (!aws_result) {
+    return minifi::utils::nullopt;
+  }
+
+  PutObjectResult result;
+  // Etags are returned by AWS in quoted form that should be removed
+  result.etag = minifi::utils::StringUtils::removeFramingCharacters(aws_result->GetETag(), '"');
+  result.version = aws_result->GetVersionId();
+
+  // GetExpiration returns a string pair with a date and a ruleid in 'expiry-date=\"<DATE>\", rule-id=\"<RULEID>\"' format
+  // s3.expiration only needs the date member of this pair
+  result.expiration = getExpiration(aws_result->GetExpiration()).expiration_time;
+  result.ssealgorithm = getEncryptionString(aws_result->GetServerSideEncryption());
+  return result;
+}
+
+bool S3Wrapper::deleteObject(const std::string& bucket, const std::string& object_key, const std::string& version) {
+  Aws::S3::Model::DeleteObjectRequest request;
+  request.SetBucket(bucket);
+  request.SetKey(object_key);
+  if (!version.empty()) {
+    request.SetVersionId(version);
+  }
+  return request_sender_->sendDeleteObjectRequest(request);
+}
+
+int64_t S3Wrapper::writeFetchedBody(Aws::IOStream& source, const int64_t data_size, const std::shared_ptr<io::BaseStream>& output) {
+  static const int64_t BUFFER_SIZE = 4096;
+  std::vector<uint8_t> buffer;
+  buffer.reserve(BUFFER_SIZE);
 
-  if (outcome.IsSuccess()) {
-      logger_->log_info("Added S3 object '%s' to bucket '%s'", request.GetKey(), request.GetBucket());
-      return outcome.GetResultWithOwnership();
-  } else {
-    logger_->log_error("PutS3Object failed with the following: '%s'", outcome.GetError().GetMessage());
+  int64_t write_size = 0;
+  while (write_size < data_size) {
+    auto next_write_size = (std::min)(data_size - write_size, BUFFER_SIZE);
+    if (!source.read(reinterpret_cast<char*>(buffer.data()), next_write_size)) {
+      return -1;
+    }
+    auto ret = output->write(buffer.data(), next_write_size);
+    if (ret < 0) {
+      return ret;
+    }
+    write_size += next_write_size;
+  }
+  return write_size;
+}
+
+minifi::utils::optional<GetObjectResult> S3Wrapper::getObject(const GetObjectRequestParameters& get_object_params, const std::shared_ptr<io::BaseStream>& out_body) {
+  auto request = createFetchObjectRequest<Aws::S3::Model::GetObjectRequest>(get_object_params);
+  auto aws_result = request_sender_->sendGetObjectRequest(request);
+  if (!aws_result) {
     return minifi::utils::nullopt;
   }
+  auto result = fillFetchObjectResult<Aws::S3::Model::GetObjectResult, GetObjectResult>(get_object_params, aws_result.value());
+  result.write_size = writeFetchedBody(aws_result->GetBody(), aws_result->GetContentLength(), out_body);
+  return result;
+}
+
+void S3Wrapper::addListResults(const Aws::Vector<Aws::S3::Model::ObjectVersion>& content, const uint64_t min_object_age, std::vector<ListedObjectAttributes>& listed_objects) {
+  for (const auto& version : content) {
+    if (last_bucket_list_timestamp_ - min_object_age < version.GetLastModified().Millis()) {
+      logger_->log_debug("Object version '%s' of key '%s' skipped due to minimum object age filter", version.GetVersionId(), version.GetKey());
+      continue;
+    }
+
+    ListedObjectAttributes attributes;
+    attributes.etag = minifi::utils::StringUtils::removeFramingCharacters(version.GetETag(), '"');
+    attributes.filename = version.GetKey();
+    attributes.is_latest = version.GetIsLatest();
+    attributes.last_modified = version.GetLastModified().Millis();
+    attributes.length = version.GetSize();
+    attributes.store_class = VERSION_STORAGE_CLASS_MAP.at(version.GetStorageClass());
+    attributes.version = version.GetVersionId();
+    listed_objects.push_back(attributes);
+  }
 }
 
-bool S3Wrapper::sendDeleteObjectRequest(const Aws::S3::Model::DeleteObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  Aws::S3::Model::DeleteObjectOutcome outcome = s3_client.DeleteObject(request);
+void S3Wrapper::addListResults(const Aws::Vector<Aws::S3::Model::Object>& content, const uint64_t min_object_age, std::vector<ListedObjectAttributes>& listed_objects) {
+  for (const auto& object : content) {
+    if (last_bucket_list_timestamp_ - min_object_age < object.GetLastModified().Millis()) {
+      logger_->log_debug("Object with key '%s' skipped due to minimum object age filter", object.GetKey());
+      continue;
+    }
 
-  if (outcome.IsSuccess()) {
-    logger_->log_info("Deleted S3 object '%s' from bucket '%s'", request.GetKey(), request.GetBucket());
-    return true;
-  } else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) {
-    logger_->log_info("S3 object '%s' was not found in bucket '%s'", request.GetKey(), request.GetBucket());
-    return true;
-  } else {
-    logger_->log_error("DeleteS3Object failed with the following: '%s'", outcome.GetError().GetMessage());
-    return false;
+    ListedObjectAttributes attributes;
+    attributes.etag = minifi::utils::StringUtils::removeFramingCharacters(object.GetETag(), '"');
+    attributes.filename = object.GetKey();
+    attributes.is_latest = true;
+    attributes.last_modified = object.GetLastModified().Millis();
+    attributes.length = object.GetSize();
+    attributes.store_class = OBJECT_STORAGE_CLASS_MAP.at(object.GetStorageClass());
+    listed_objects.push_back(attributes);
   }
 }
 
-minifi::utils::optional<Aws::S3::Model::GetObjectResult> S3Wrapper::sendGetObjectRequest(const Aws::S3::Model::GetObjectRequest& request) {
-  Aws::S3::S3Client s3_client(credentials_, client_config_);
-  auto outcome = s3_client.GetObject(request);
+minifi::utils::optional<std::vector<ListedObjectAttributes>> S3Wrapper::listVersions(const ListRequestParameters& params) {
+  auto request = createListRequest<Aws::S3::Model::ListObjectVersionsRequest>(params);
+  std::vector<ListedObjectAttributes> attribute_list;
+  nonstd::optional_lite::optional<Aws::S3::Model::ListObjectVersionsResult> aws_result;
+  do {
+    aws_result = request_sender_->sendListVersionsRequest(request);
+    if (!aws_result) {
+      return minifi::utils::nullopt;
+    }
+    const auto& versions = aws_result->GetVersions();
+    logger_->log_debug("AWS S3 List operation returned %zu versions. This result is truncated: %s", versions.size(), aws_result->GetIsTruncated() ? "true" : "false");

Review comment:
       Updated in 322c3f0d787b1fbd68518f572337951d466bdf3a




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org