You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "westonpace (via GitHub)" <gi...@apache.org> on 2023/02/07 14:03:42 UTC

[GitHub] [arrow] westonpace commented on a diff in pull request #33753: GH-30891: [C++] The C++ API for writing datasets could be improved

westonpace commented on code in PR #33753:
URL: https://github.com/apache/arrow/pull/33753#discussion_r1098694021


##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -450,20 +465,25 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   };
 
   const std::shared_ptr<FileFormat>& format() const {
-    return file_write_options->format();
+    return file_write_options_->format();
   }

Review Comment:
   If you make `format_` public you won't need this but why wasn't it just `format_`?



##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -450,20 +465,25 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   };
 
   const std::shared_ptr<FileFormat>& format() const {
-    return file_write_options->format();
+    return file_write_options_->format();
   }
+
+ private:
+  FileSystemDatasetWriteOptions() = delete;

Review Comment:
   ```suggestion
   ```
   This will be implicitly deleted since you have an explicitly defined constructor above.
   



##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -385,24 +386,38 @@ class ARROW_DS_EXPORT FileWriter {
 
 /// \brief Options for writing a dataset.
 struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
+ public:
+  explicit FileSystemDatasetWriteOptions(
+      const std::shared_ptr<FileFormat>& format,
+      const std::shared_ptr<FileWriteOptions> file_write_options = NULLPTR) {
+    if (file_write_options == NULLPTR) {
+      file_write_options_ = format->DefaultWriteOptions();
+    } else {
+      file_write_options_ = file_write_options;
+    }
+  }
+
   /// Options for individual fragment writing.
-  std::shared_ptr<FileWriteOptions> file_write_options;
+  std::shared_ptr<FileWriteOptions> fileWriteOptions() const {
+    return file_write_options_;
+  }

Review Comment:
   ```suggestion
     std::shared_ptr<FileWriteOptions> file_write_options() const {
       return file_write_options_;
     }
   ```
   Or, if you make `file_write_options_` public then this isn't needed.



##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -280,7 +281,7 @@ class ARROW_DS_EXPORT FileSystemDataset : public Dataset {
       std::shared_ptr<Partitioning> partitioning = NULLPTR);
 
   /// \brief Write a dataset.
-  static Status Write(const FileSystemDatasetWriteOptions& write_options,
+  static Status Write(const std::shared_ptr<FileSystemDatasetWriteOptions>& write_options,

Review Comment:
   What's the reason for this change?



##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -450,20 +465,25 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
   };
 
   const std::shared_ptr<FileFormat>& format() const {
-    return file_write_options->format();
+    return file_write_options_->format();
   }
+
+ private:
+  FileSystemDatasetWriteOptions() = delete;
+  std::shared_ptr<FileFormat> format_;
+  std::shared_ptr<FileWriteOptions> file_write_options_;

Review Comment:
   I don't think these need to be private.  Instead you can make them `const`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org