You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "wjones127 (via GitHub)" <gi...@apache.org> on 2023/03/20 21:14:31 UTC

[GitHub] [arrow] wjones127 commented on a diff in pull request #34616: GH-29238 [C++][Dataset][Parquet] Support parquet modular encryption in the new Dataset API

wjones127 commented on code in PR #34616:
URL: https://github.com/apache/arrow/pull/34616#discussion_r1142628397


##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};
+
+namespace arrow {
+namespace dataset {
+
+class DatasetEncryptionTest : public ::testing::Test {
+ protected:
+  std::unique_ptr<arrow::internal::TemporaryDir> temp_dir_;
+  std::shared_ptr<::arrow::dataset::InMemoryDataset> dataset_;
+  std::string footer_key_name_ = "footer_key";
+
+  ::parquet::encryption::DatasetEncryptionConfiguration dataset_encryption_config_;
+  ::parquet::encryption::DatasetDecryptionConfiguration dataset_decryption_config_;
+  std::string column_key_mapping_;
+  ::parquet::encryption::KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<::parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<ParquetFileFormat> file_format_;
+  std::shared_ptr<::arrow::fs::FileSystem> file_system_;
+
+  /** setup the test
+   *
+   */
+  void SetUp() {
+    // create our mock file system
+    ::arrow::fs::TimePoint mock_now = std::chrono::system_clock::now();
+    ASSERT_OK_AND_ASSIGN(file_system_,
+                         ::arrow::fs::internal::MockFileSystem::Make(mock_now, {}));
+    // build our dummy table
+    BuildTable();
+
+    auto key_list = BuildKeyMap(dsColumnMasterKeyIds, dsColumnMasterKeys,
+                                dsFooterMasterKeyId, dsFooterMasterKey);
+
+    SetupCryptoFactory(true, key_list);
+
+    column_key_mapping_ = "col_key: a";
+
+    // Setup our Dataset encrytion configurations
+    dataset_encryption_config_.crypto_factory = crypto_factory_;
+    dataset_encryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_encryption_config_.encryption_config =
+        std::make_shared<::parquet::encryption::EncryptionConfiguration>(
+            footer_key_name_);
+    dataset_encryption_config_.encryption_config->column_keys = column_key_mapping_;
+    dataset_encryption_config_.encryption_config->footer_key = footer_key_name_;
+
+    dataset_decryption_config_.crypto_factory = crypto_factory_;
+    dataset_decryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_decryption_config_.decryption_config =
+        std::make_shared<::parquet::encryption::DecryptionConfiguration>();
+
+    // create our Parquet file format object
+    file_format_ = std::make_shared<ParquetFileFormat>();
+
+    file_format_->SetDatasetEncryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetEncryptionConfiguration>(
+            dataset_encryption_config_));
+    file_format_->SetDatasetDecryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetDecryptionConfiguration>(
+            dataset_decryption_config_));
+  }
+
+  /** utility to build the key map
+   *
+   */
+  std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
+                                                           const char* const* column_keys,
+                                                           const char* footer_id,
+                                                           const char* footer_key) {
+    std::unordered_map<std::string, std::string> key_map;
+    // add column keys
+    for (int i = 0; i < 1; i++) {
+      key_map.insert({column_ids[i], column_keys[i]});
+    }
+    // add footer key
+    key_map.insert({footer_id, footer_key});
+
+    return key_map;
+  }
+
+  /** utilty to build column key mapping
+   *
+   */
+  std::string BuildColumnKeyMapping() {
+    std::ostringstream stream;
+    stream << dsColumnMasterKeys[0] << ":"
+           << "a"
+           << ";";
+    return stream.str();
+  }
+  /** Write dataset to disk
+   *
+   */
+  void WriteDataset() {
+    auto base_path = "";
+    ASSERT_OK(file_system_->CreateDir(base_path));
+    // Write it using Datasets
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset_->NewScan());
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+    auto partition_schema = ::arrow::schema({::arrow::field("part", ::arrow::utf8())});
+    auto partitioning =
+        std::make_shared<::arrow::dataset::HivePartitioning>(partition_schema);
+    ::arrow::dataset::FileSystemDatasetWriteOptions write_options;
+    write_options.file_write_options = file_format_->DefaultWriteOptions();
+    write_options.filesystem = file_system_;
+    write_options.base_dir = base_path;
+    write_options.partitioning = partitioning;
+    write_options.basename_template = "part{i}.parquet";
+    ASSERT_OK(::arrow::dataset::FileSystemDataset::Write(write_options, scanner));
+
+    auto mock_fs =
+        std::dynamic_pointer_cast<::arrow::fs::internal::MockFileSystem>(file_system_);
+
+    std::vector<std::string> files = {"part=a/part0.parquet", "part=b/part0.parquet",
+                                      "part=c/part0.parquet", "part=d/part0.parquet",
+                                      "part=e/part0.parquet", "part=f/part0.parquet",
+                                      "part=g/part0.parquet", "part=h/part0.parquet",
+                                      "part=i/part0.parquet", "part=j/part0.parquet"};
+    ValidateFilesExist(mock_fs, files);
+  }
+
+  /** A utility function to validate our files were written out */
+  void ValidateFilesExist(const std::shared_ptr<arrow::fs::internal::MockFileSystem>& fs,
+                          const std::vector<std::string>& files) {
+    for (const auto& file_path : files) {
+      ASSERT_OK_AND_ASSIGN(auto result, fs->GetFileInfo(file_path));
+
+      ASSERT_NE(result.type(), arrow::fs::FileType::NotFound);
+    }
+  }
+
+  void ReadDataset() {
+    // File format
+    // Partitioning
+    auto partition_schema = arrow::schema({arrow::field("part", arrow::utf8())});
+    auto partitioning =
+        std::make_shared<arrow::dataset::HivePartitioning>(partition_schema);
+
+    // Get FileInfo objects for all files under the base directory
+    arrow::fs::FileSelector selector;
+    selector.base_dir = "";
+    selector.recursive = true;
+    ASSERT_OK_AND_ASSIGN(auto files, file_system_->GetFileInfo(selector));
+
+    // Create a FileSystemDatasetFactory
+    arrow::dataset::FileSystemFactoryOptions factory_options;
+    factory_options.partitioning = partitioning;
+    ASSERT_OK_AND_ASSIGN(auto dataset_factory,
+                         arrow::dataset::FileSystemDatasetFactory::Make(
+                             file_system_, files, file_format_, factory_options));
+
+    // Create a Dataset
+    ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
+
+    // Create a ScannerBuilder
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+
+    // Create a Scanner
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());

Review Comment:
   This test seems incomplete. The scanner is just configured, but isn't actually used to read data. We should probably read the data and then verify it matches expected values.
   



##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -594,10 +608,33 @@ Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
   auto parquet_options = checked_pointer_cast<ParquetFileWriteOptions>(options);
 
   std::unique_ptr<parquet::arrow::FileWriter> parquet_writer;
-  ARROW_ASSIGN_OR_RAISE(parquet_writer, parquet::arrow::FileWriter::Open(
-                                            *schema, default_memory_pool(), destination,
-                                            parquet_options->writer_properties,
-                                            parquet_options->arrow_writer_properties));
+
+  std::shared_ptr<parquet::encryption::DatasetEncryptionConfiguration>
+      dataset_encrypt_config = GetDatasetEncryptionConfig();
+
+  if (dataset_encrypt_config != nullptr) {
+    auto file_encryption_prop =
+        dataset_encrypt_config->crypto_factory->GetFileEncryptionProperties(
+            *dataset_encrypt_config->kms_connection_config.get(),
+            *dataset_encrypt_config->encryption_config.get(), destination_locator.path,
+            destination_locator.filesystem);
+
+    auto writer_properties =
+        parquet::WriterProperties::Builder(*parquet_options->writer_properties.get())
+            .encryption(file_encryption_prop)
+            ->build();
+
+    ARROW_ASSIGN_OR_RAISE(
+        parquet_writer, parquet::arrow::FileWriter::Open(
+                            *schema, default_memory_pool(), destination,

Review Comment:
   This is a pre-existing issue, but I don't think we should be using `default_memory_pool()` here. (Correct me if I am wrong @westonpace.)
   ```suggestion
                               *schema, writer_properties->memory_pool(), destination,
   ```



##########
python/examples/dataset/write_dataset_encrypted.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import pyarrow as pa
+import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+import pyarrow.parquet.encryption as pe
+from pyarrow.tests.parquet.encryption import InMemoryKmsClient
+from datetime import timedelta
+import shutil
+import os
+
+""" A sample to demostrate dataset encryption and decryption"""
+
+# create a list of dictionaries that will represent our dataset
+table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+                  'n_legs': [2, 2, 4, 4, 5, 100],
+                  'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+                             "Brittle stars", "Centipede"]})
+
+# create a PyArrow dataset from the table
+dataset = ds.dataset(table)
+
+FOOTER_KEY = b"0123456789112345"
+FOOTER_KEY_NAME = "footer_key"
+COL_KEY = b"1234567890123450"
+COL_KEY_NAME = "col_key"
+
+encryption_config = pe.EncryptionConfiguration(
+    footer_key=FOOTER_KEY_NAME,
+    column_keys={
+        COL_KEY_NAME: ["n_legs", "animal"],
+    },
+    encryption_algorithm="AES_GCM_V1",
+    cache_lifetime=timedelta(minutes=5.0),
+    data_key_length_bits=256)
+
+decryption_config = pe.DecryptionConfiguration(cache_lifetime=300)

Review Comment:
   In one place `cache_lifetime` is taking a `timedelta` and in another an integer. Can both APIs take either, or is there an inconsistency here?



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};
+
+namespace arrow {
+namespace dataset {
+
+class DatasetEncryptionTest : public ::testing::Test {
+ protected:
+  std::unique_ptr<arrow::internal::TemporaryDir> temp_dir_;
+  std::shared_ptr<::arrow::dataset::InMemoryDataset> dataset_;
+  std::string footer_key_name_ = "footer_key";
+
+  ::parquet::encryption::DatasetEncryptionConfiguration dataset_encryption_config_;
+  ::parquet::encryption::DatasetDecryptionConfiguration dataset_decryption_config_;
+  std::string column_key_mapping_;
+  ::parquet::encryption::KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<::parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<ParquetFileFormat> file_format_;
+  std::shared_ptr<::arrow::fs::FileSystem> file_system_;
+
+  /** setup the test
+   *
+   */
+  void SetUp() {

Review Comment:
   Here's a behavior we should probably test: If we request a column is encrypted, but use that column as a partition column, what happens? Are the serialized values encrypted? Or is an error returned?



##########
cpp/write_dataset_example.py:
##########


Review Comment:
   Should this file be removed?



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};
+
+namespace arrow {
+namespace dataset {
+
+class DatasetEncryptionTest : public ::testing::Test {
+ protected:
+  std::unique_ptr<arrow::internal::TemporaryDir> temp_dir_;
+  std::shared_ptr<::arrow::dataset::InMemoryDataset> dataset_;
+  std::string footer_key_name_ = "footer_key";
+
+  ::parquet::encryption::DatasetEncryptionConfiguration dataset_encryption_config_;
+  ::parquet::encryption::DatasetDecryptionConfiguration dataset_decryption_config_;
+  std::string column_key_mapping_;
+  ::parquet::encryption::KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<::parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<ParquetFileFormat> file_format_;
+  std::shared_ptr<::arrow::fs::FileSystem> file_system_;
+
+  /** setup the test
+   *
+   */
+  void SetUp() {
+    // create our mock file system
+    ::arrow::fs::TimePoint mock_now = std::chrono::system_clock::now();
+    ASSERT_OK_AND_ASSIGN(file_system_,
+                         ::arrow::fs::internal::MockFileSystem::Make(mock_now, {}));
+    // build our dummy table
+    BuildTable();
+
+    auto key_list = BuildKeyMap(dsColumnMasterKeyIds, dsColumnMasterKeys,
+                                dsFooterMasterKeyId, dsFooterMasterKey);
+
+    SetupCryptoFactory(true, key_list);
+
+    column_key_mapping_ = "col_key: a";
+
+    // Setup our Dataset encrytion configurations
+    dataset_encryption_config_.crypto_factory = crypto_factory_;
+    dataset_encryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_encryption_config_.encryption_config =
+        std::make_shared<::parquet::encryption::EncryptionConfiguration>(
+            footer_key_name_);
+    dataset_encryption_config_.encryption_config->column_keys = column_key_mapping_;
+    dataset_encryption_config_.encryption_config->footer_key = footer_key_name_;
+
+    dataset_decryption_config_.crypto_factory = crypto_factory_;
+    dataset_decryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_decryption_config_.decryption_config =
+        std::make_shared<::parquet::encryption::DecryptionConfiguration>();
+
+    // create our Parquet file format object
+    file_format_ = std::make_shared<ParquetFileFormat>();
+
+    file_format_->SetDatasetEncryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetEncryptionConfiguration>(
+            dataset_encryption_config_));
+    file_format_->SetDatasetDecryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetDecryptionConfiguration>(
+            dataset_decryption_config_));
+  }
+
+  /** utility to build the key map
+   *
+   */
+  std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
+                                                           const char* const* column_keys,
+                                                           const char* footer_id,
+                                                           const char* footer_key) {
+    std::unordered_map<std::string, std::string> key_map;
+    // add column keys
+    for (int i = 0; i < 1; i++) {
+      key_map.insert({column_ids[i], column_keys[i]});
+    }

Review Comment:
   This seems to only use the first value? Which is fine for the existing case, but might be confusing for future devs.



##########
python/examples/dataset/write_dataset_encrypted.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import pyarrow as pa

Review Comment:
   Could you transform this into some unit tests for the dataset encryption in Python. We should make sure that it is passing configuration down and propagating up errors correctly. 



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};

Review Comment:
   I think going forward we'll prefer `constexpr std::string_view` (there are some places left over from when we supported C++11):
   
   ```suggestion
   constexpr std::string_view dsFooterMasterKey = "0123456789012345";
   constexpr std::string_view dsFooterMasterKeyId = "footer_key";
   constexpr std::string_view dsColumnMasterKeys[] = {"1234567890123450"};
   constexpr std::string_view dsColumnMasterKeyIds[] = {"col_key"};
   ```



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};
+
+namespace arrow {
+namespace dataset {
+
+class DatasetEncryptionTest : public ::testing::Test {
+ protected:
+  std::unique_ptr<arrow::internal::TemporaryDir> temp_dir_;
+  std::shared_ptr<::arrow::dataset::InMemoryDataset> dataset_;
+  std::string footer_key_name_ = "footer_key";
+
+  ::parquet::encryption::DatasetEncryptionConfiguration dataset_encryption_config_;
+  ::parquet::encryption::DatasetDecryptionConfiguration dataset_decryption_config_;
+  std::string column_key_mapping_;
+  ::parquet::encryption::KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<::parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<ParquetFileFormat> file_format_;
+  std::shared_ptr<::arrow::fs::FileSystem> file_system_;
+
+  /** setup the test
+   *
+   */
+  void SetUp() {
+    // create our mock file system
+    ::arrow::fs::TimePoint mock_now = std::chrono::system_clock::now();
+    ASSERT_OK_AND_ASSIGN(file_system_,
+                         ::arrow::fs::internal::MockFileSystem::Make(mock_now, {}));
+    // build our dummy table
+    BuildTable();
+
+    auto key_list = BuildKeyMap(dsColumnMasterKeyIds, dsColumnMasterKeys,
+                                dsFooterMasterKeyId, dsFooterMasterKey);
+
+    SetupCryptoFactory(true, key_list);
+
+    column_key_mapping_ = "col_key: a";
+
+    // Setup our Dataset encrytion configurations
+    dataset_encryption_config_.crypto_factory = crypto_factory_;
+    dataset_encryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_encryption_config_.encryption_config =
+        std::make_shared<::parquet::encryption::EncryptionConfiguration>(
+            footer_key_name_);
+    dataset_encryption_config_.encryption_config->column_keys = column_key_mapping_;
+    dataset_encryption_config_.encryption_config->footer_key = footer_key_name_;
+
+    dataset_decryption_config_.crypto_factory = crypto_factory_;
+    dataset_decryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_decryption_config_.decryption_config =
+        std::make_shared<::parquet::encryption::DecryptionConfiguration>();
+
+    // create our Parquet file format object
+    file_format_ = std::make_shared<ParquetFileFormat>();
+
+    file_format_->SetDatasetEncryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetEncryptionConfiguration>(
+            dataset_encryption_config_));
+    file_format_->SetDatasetDecryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetDecryptionConfiguration>(
+            dataset_decryption_config_));
+  }
+
+  /** utility to build the key map
+   *
+   */
+  std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
+                                                           const char* const* column_keys,
+                                                           const char* footer_id,
+                                                           const char* footer_key) {
+    std::unordered_map<std::string, std::string> key_map;
+    // add column keys
+    for (int i = 0; i < 1; i++) {
+      key_map.insert({column_ids[i], column_keys[i]});
+    }
+    // add footer key
+    key_map.insert({footer_id, footer_key});
+
+    return key_map;
+  }
+
+  /** utilty to build column key mapping
+   *
+   */
+  std::string BuildColumnKeyMapping() {
+    std::ostringstream stream;
+    stream << dsColumnMasterKeys[0] << ":"
+           << "a"
+           << ";";
+    return stream.str();
+  }
+  /** Write dataset to disk
+   *
+   */
+  void WriteDataset() {
+    auto base_path = "";
+    ASSERT_OK(file_system_->CreateDir(base_path));
+    // Write it using Datasets
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset_->NewScan());
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+    auto partition_schema = ::arrow::schema({::arrow::field("part", ::arrow::utf8())});
+    auto partitioning =
+        std::make_shared<::arrow::dataset::HivePartitioning>(partition_schema);
+    ::arrow::dataset::FileSystemDatasetWriteOptions write_options;
+    write_options.file_write_options = file_format_->DefaultWriteOptions();
+    write_options.filesystem = file_system_;
+    write_options.base_dir = base_path;
+    write_options.partitioning = partitioning;
+    write_options.basename_template = "part{i}.parquet";
+    ASSERT_OK(::arrow::dataset::FileSystemDataset::Write(write_options, scanner));
+
+    auto mock_fs =
+        std::dynamic_pointer_cast<::arrow::fs::internal::MockFileSystem>(file_system_);
+
+    std::vector<std::string> files = {"part=a/part0.parquet", "part=b/part0.parquet",
+                                      "part=c/part0.parquet", "part=d/part0.parquet",
+                                      "part=e/part0.parquet", "part=f/part0.parquet",
+                                      "part=g/part0.parquet", "part=h/part0.parquet",
+                                      "part=i/part0.parquet", "part=j/part0.parquet"};
+    ValidateFilesExist(mock_fs, files);
+  }
+
+  /** A utility function to validate our files were written out */
+  void ValidateFilesExist(const std::shared_ptr<arrow::fs::internal::MockFileSystem>& fs,
+                          const std::vector<std::string>& files) {
+    for (const auto& file_path : files) {
+      ASSERT_OK_AND_ASSIGN(auto result, fs->GetFileInfo(file_path));
+
+      ASSERT_NE(result.type(), arrow::fs::FileType::NotFound);
+    }
+  }
+
+  void ReadDataset() {
+    // File format
+    // Partitioning
+    auto partition_schema = arrow::schema({arrow::field("part", arrow::utf8())});
+    auto partitioning =
+        std::make_shared<arrow::dataset::HivePartitioning>(partition_schema);
+
+    // Get FileInfo objects for all files under the base directory
+    arrow::fs::FileSelector selector;
+    selector.base_dir = "";
+    selector.recursive = true;
+    ASSERT_OK_AND_ASSIGN(auto files, file_system_->GetFileInfo(selector));
+
+    // Create a FileSystemDatasetFactory
+    arrow::dataset::FileSystemFactoryOptions factory_options;
+    factory_options.partitioning = partitioning;
+    ASSERT_OK_AND_ASSIGN(auto dataset_factory,
+                         arrow::dataset::FileSystemDatasetFactory::Make(
+                             file_system_, files, file_format_, factory_options));
+
+    // Create a Dataset
+    ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
+
+    // Create a ScannerBuilder
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+
+    // Create a Scanner
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+  }
+
+  /** Build a dummy table
+   *
+   */
+  void BuildTable() {
+    // Create an Arrow Table
+    auto schema = arrow::schema(
+        {arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
+         arrow::field("c", arrow::int64()), arrow::field("part", arrow::utf8())});
+    std::vector<std::shared_ptr<arrow::Array>> arrays(4);
+    arrow::NumericBuilder<arrow::Int64Type> builder;
+    ASSERT_OK(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
+    ASSERT_OK(builder.Finish(&arrays[0]));
+    builder.Reset();
+    ASSERT_OK(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
+    ASSERT_OK(builder.Finish(&arrays[1]));
+    builder.Reset();
+    ASSERT_OK(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
+    ASSERT_OK(builder.Finish(&arrays[2]));
+    arrow::StringBuilder string_builder;
+    ASSERT_OK(
+        string_builder.AppendValues({"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}));
+    ASSERT_OK(string_builder.Finish(&arrays[3]));
+    auto table = arrow::Table::Make(schema, arrays);
+    // Write it using Datasets
+    dataset_ = std::make_shared<::arrow::dataset::InMemoryDataset>(table);
+  }
+
+  /** Helper function to create crypto factory and setup
+   */
+  void SetupCryptoFactory(bool wrap_locally,
+                          const std::unordered_map<std::string, std::string>& key_list) {
+    crypto_factory_ = std::make_shared<::parquet::encryption::CryptoFactory>();
+
+    std::shared_ptr<::parquet::encryption::KmsClientFactory> kms_client_factory =
+        std::make_shared<::parquet::encryption::TestOnlyInMemoryKmsClientFactory>(
+            wrap_locally, key_list);
+
+    crypto_factory_->RegisterKmsClientFactory(kms_client_factory);
+  }
+};
+
+TEST_F(DatasetEncryptionTest, WriteDatasetEncrypted) { this->WriteDataset(); }
+TEST_F(DatasetEncryptionTest, ReadDatasetEncrypted) { this->ReadDataset(); }

Review Comment:
   IIUC, these tests depend on each other. Perhaps we should combine them into a single `RoundTripDatasetEncrypted` test.



##########
python/examples/dataset/write_dataset_encrypted.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import pyarrow as pa
+import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+import pyarrow.parquet.encryption as pe
+from pyarrow.tests.parquet.encryption import InMemoryKmsClient
+from datetime import timedelta
+import shutil
+import os
+
+""" A sample to demostrate dataset encryption and decryption"""
+
+# create a list of dictionaries that will represent our dataset
+table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+                  'n_legs': [2, 2, 4, 4, 5, 100],
+                  'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+                             "Brittle stars", "Centipede"]})
+
+# create a PyArrow dataset from the table
+dataset = ds.dataset(table)
+
+FOOTER_KEY = b"0123456789112345"
+FOOTER_KEY_NAME = "footer_key"
+COL_KEY = b"1234567890123450"
+COL_KEY_NAME = "col_key"
+
+encryption_config = pe.EncryptionConfiguration(
+    footer_key=FOOTER_KEY_NAME,
+    column_keys={
+        COL_KEY_NAME: ["n_legs", "animal"],
+    },

Review Comment:
   Would be good to add comments to the example explaining what this means. For example:
   
   ```suggestion
       # Use COL_KEY_NAME to encrypt `n_legs` and `animal` columns.
       column_keys={
           COL_KEY_NAME: ["n_legs", "animal"],
       },
   ```



##########
cpp/src/arrow/dataset/file_parquet.cc:
##########
@@ -594,10 +608,33 @@ Result<std::shared_ptr<FileWriter>> ParquetFileFormat::MakeWriter(
   auto parquet_options = checked_pointer_cast<ParquetFileWriteOptions>(options);
 
   std::unique_ptr<parquet::arrow::FileWriter> parquet_writer;
-  ARROW_ASSIGN_OR_RAISE(parquet_writer, parquet::arrow::FileWriter::Open(
-                                            *schema, default_memory_pool(), destination,
-                                            parquet_options->writer_properties,
-                                            parquet_options->arrow_writer_properties));
+
+  std::shared_ptr<parquet::encryption::DatasetEncryptionConfiguration>
+      dataset_encrypt_config = GetDatasetEncryptionConfig();
+
+  if (dataset_encrypt_config != nullptr) {
+    auto file_encryption_prop =
+        dataset_encrypt_config->crypto_factory->GetFileEncryptionProperties(
+            *dataset_encrypt_config->kms_connection_config.get(),
+            *dataset_encrypt_config->encryption_config.get(), destination_locator.path,
+            destination_locator.filesystem);
+
+    auto writer_properties =
+        parquet::WriterProperties::Builder(*parquet_options->writer_properties.get())
+            .encryption(file_encryption_prop)
+            ->build();
+
+    ARROW_ASSIGN_OR_RAISE(
+        parquet_writer, parquet::arrow::FileWriter::Open(
+                            *schema, default_memory_pool(), destination,
+                            writer_properties, parquet_options->arrow_writer_properties));
+
+  } else {
+    ARROW_ASSIGN_OR_RAISE(parquet_writer, parquet::arrow::FileWriter::Open(
+                                              *schema, default_memory_pool(), destination,

Review Comment:
   Same here.



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########


Review Comment:
   Another test to add: If we write a dataset with encryption, can we use a single file reader to read just one file out of it as long as we pass the same encryption configuration?



##########
cpp/src/arrow/dataset/file_parquet.h:
##########
@@ -136,6 +138,33 @@ class ARROW_DS_EXPORT ParquetFileFormat : public FileFormat {
       fs::FileLocator destination_locator) const override;
 
   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override;
+   
+  /// \brief A getter function to retrieve the dataset encryption configuration
+  std::shared_ptr<parquet::encryption::DatasetEncryptionConfiguration> GetDatasetEncryptionConfig() const {
+    return dataset_encryption_config_;
+  }
+  /// \brief A getter function to retrieve the dataset decryption configuration
+  std::shared_ptr<parquet::encryption::DatasetDecryptionConfiguration> GetDatasetDecryptionConfig() const {
+    return dataset_decryption_config_;
+  }
+  // create an setter for DatasetEncryptionConfiguration

Review Comment:
   Should this and the comment below be transformed into a doc comment?



##########
cpp/src/arrow/dataset/dataset_encryption_test.cc:
##########
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/gtest_util.h"
+#include "gtest/gtest.h"
+
+#include <arrow/api.h>
+#include <arrow/dataset/api.h>
+#include "arrow/array/builder_primitive.h"
+#include "arrow/builder.h"
+#include "arrow/dataset/partition.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "parquet/encryption/dataset_encryption_config.h"
+#include "parquet/encryption/test_in_memory_kms.h"
+
+const char dsFooterMasterKey[] = "0123456789012345";
+const char dsFooterMasterKeyId[] = "footer_key";
+const char* const dsColumnMasterKeys[] = {"1234567890123450"};
+const char* const dsColumnMasterKeyIds[] = {"col_key"};
+
+namespace arrow {
+namespace dataset {
+
+class DatasetEncryptionTest : public ::testing::Test {
+ protected:
+  std::unique_ptr<arrow::internal::TemporaryDir> temp_dir_;
+  std::shared_ptr<::arrow::dataset::InMemoryDataset> dataset_;
+  std::string footer_key_name_ = "footer_key";
+
+  ::parquet::encryption::DatasetEncryptionConfiguration dataset_encryption_config_;
+  ::parquet::encryption::DatasetDecryptionConfiguration dataset_decryption_config_;
+  std::string column_key_mapping_;
+  ::parquet::encryption::KmsConnectionConfig kms_connection_config_;
+  std::shared_ptr<::parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<ParquetFileFormat> file_format_;
+  std::shared_ptr<::arrow::fs::FileSystem> file_system_;
+
+  /** setup the test
+   *
+   */
+  void SetUp() {
+    // create our mock file system
+    ::arrow::fs::TimePoint mock_now = std::chrono::system_clock::now();
+    ASSERT_OK_AND_ASSIGN(file_system_,
+                         ::arrow::fs::internal::MockFileSystem::Make(mock_now, {}));
+    // build our dummy table
+    BuildTable();
+
+    auto key_list = BuildKeyMap(dsColumnMasterKeyIds, dsColumnMasterKeys,
+                                dsFooterMasterKeyId, dsFooterMasterKey);
+
+    SetupCryptoFactory(true, key_list);
+
+    column_key_mapping_ = "col_key: a";
+
+    // Setup our Dataset encrytion configurations
+    dataset_encryption_config_.crypto_factory = crypto_factory_;
+    dataset_encryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_encryption_config_.encryption_config =
+        std::make_shared<::parquet::encryption::EncryptionConfiguration>(
+            footer_key_name_);
+    dataset_encryption_config_.encryption_config->column_keys = column_key_mapping_;
+    dataset_encryption_config_.encryption_config->footer_key = footer_key_name_;
+
+    dataset_decryption_config_.crypto_factory = crypto_factory_;
+    dataset_decryption_config_.kms_connection_config =
+        std::make_shared<::parquet::encryption::KmsConnectionConfig>(
+            kms_connection_config_);
+    dataset_decryption_config_.decryption_config =
+        std::make_shared<::parquet::encryption::DecryptionConfiguration>();
+
+    // create our Parquet file format object
+    file_format_ = std::make_shared<ParquetFileFormat>();
+
+    file_format_->SetDatasetEncryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetEncryptionConfiguration>(
+            dataset_encryption_config_));
+    file_format_->SetDatasetDecryptionConfig(
+        std::make_shared<::parquet::encryption::DatasetDecryptionConfiguration>(
+            dataset_decryption_config_));
+  }
+
+  /** utility to build the key map
+   *
+   */
+  std::unordered_map<std::string, std::string> BuildKeyMap(const char* const* column_ids,
+                                                           const char* const* column_keys,
+                                                           const char* footer_id,
+                                                           const char* footer_key) {
+    std::unordered_map<std::string, std::string> key_map;
+    // add column keys
+    for (int i = 0; i < 1; i++) {
+      key_map.insert({column_ids[i], column_keys[i]});
+    }
+    // add footer key
+    key_map.insert({footer_id, footer_key});
+
+    return key_map;
+  }
+
+  /** utilty to build column key mapping
+   *
+   */
+  std::string BuildColumnKeyMapping() {
+    std::ostringstream stream;
+    stream << dsColumnMasterKeys[0] << ":"
+           << "a"
+           << ";";
+    return stream.str();
+  }
+  /** Write dataset to disk
+   *
+   */
+  void WriteDataset() {
+    auto base_path = "";
+    ASSERT_OK(file_system_->CreateDir(base_path));
+    // Write it using Datasets
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset_->NewScan());
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+
+    auto partition_schema = ::arrow::schema({::arrow::field("part", ::arrow::utf8())});
+    auto partitioning =
+        std::make_shared<::arrow::dataset::HivePartitioning>(partition_schema);
+    ::arrow::dataset::FileSystemDatasetWriteOptions write_options;
+    write_options.file_write_options = file_format_->DefaultWriteOptions();
+    write_options.filesystem = file_system_;
+    write_options.base_dir = base_path;
+    write_options.partitioning = partitioning;
+    write_options.basename_template = "part{i}.parquet";
+    ASSERT_OK(::arrow::dataset::FileSystemDataset::Write(write_options, scanner));
+
+    auto mock_fs =
+        std::dynamic_pointer_cast<::arrow::fs::internal::MockFileSystem>(file_system_);
+
+    std::vector<std::string> files = {"part=a/part0.parquet", "part=b/part0.parquet",
+                                      "part=c/part0.parquet", "part=d/part0.parquet",
+                                      "part=e/part0.parquet", "part=f/part0.parquet",
+                                      "part=g/part0.parquet", "part=h/part0.parquet",
+                                      "part=i/part0.parquet", "part=j/part0.parquet"};
+    ValidateFilesExist(mock_fs, files);
+  }
+
+  /** A utility function to validate our files were written out */
+  void ValidateFilesExist(const std::shared_ptr<arrow::fs::internal::MockFileSystem>& fs,
+                          const std::vector<std::string>& files) {
+    for (const auto& file_path : files) {
+      ASSERT_OK_AND_ASSIGN(auto result, fs->GetFileInfo(file_path));
+
+      ASSERT_NE(result.type(), arrow::fs::FileType::NotFound);
+    }
+  }
+
+  void ReadDataset() {
+    // File format
+    // Partitioning
+    auto partition_schema = arrow::schema({arrow::field("part", arrow::utf8())});
+    auto partitioning =
+        std::make_shared<arrow::dataset::HivePartitioning>(partition_schema);
+
+    // Get FileInfo objects for all files under the base directory
+    arrow::fs::FileSelector selector;
+    selector.base_dir = "";
+    selector.recursive = true;
+    ASSERT_OK_AND_ASSIGN(auto files, file_system_->GetFileInfo(selector));
+
+    // Create a FileSystemDatasetFactory
+    arrow::dataset::FileSystemFactoryOptions factory_options;
+    factory_options.partitioning = partitioning;
+    ASSERT_OK_AND_ASSIGN(auto dataset_factory,
+                         arrow::dataset::FileSystemDatasetFactory::Make(
+                             file_system_, files, file_format_, factory_options));
+
+    // Create a Dataset
+    ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
+
+    // Create a ScannerBuilder
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+
+    // Create a Scanner
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());

Review Comment:
   Perhaps we should also assert that attempting to read the dataset without passing any encryption properties results in an appropriate error.



##########
cpp/write_dataset_example.py:
##########
@@ -0,0 +1,71 @@
+import sys
+sys.path.append('/home/ubuntu/projects/tolleybot_arrow/python')

Review Comment:
   ```suggestion
   ```



##########
python/examples/dataset/write_dataset_encrypted.py:
##########
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import pyarrow as pa
+import pyarrow.dataset as ds
+import pyarrow.parquet as pq
+import pyarrow.parquet.encryption as pe
+from pyarrow.tests.parquet.encryption import InMemoryKmsClient
+from datetime import timedelta
+import shutil
+import os
+
+""" A sample to demostrate dataset encryption and decryption"""
+
+# create a list of dictionaries that will represent our dataset
+table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
+                  'n_legs': [2, 2, 4, 4, 5, 100],
+                  'animal': ["Flamingo", "Parrot", "Dog", "Horse",
+                             "Brittle stars", "Centipede"]})
+
+# create a PyArrow dataset from the table
+dataset = ds.dataset(table)
+
+FOOTER_KEY = b"0123456789112345"
+FOOTER_KEY_NAME = "footer_key"
+COL_KEY = b"1234567890123450"
+COL_KEY_NAME = "col_key"
+
+encryption_config = pe.EncryptionConfiguration(
+    footer_key=FOOTER_KEY_NAME,

Review Comment:
   Is this showing encrypting the footer or plaintext footer (where the key is used for a signature)?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org