You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/04/27 16:44:27 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #10157: ARROW-9299: [C++][Python] Expose ORC metadata

pitrou commented on a change in pull request #10157:
URL: https://github.com/apache/arrow/pull/10157#discussion_r621398746



##########
File path: cpp/src/arrow/adapters/orc/adapter.cc
##########
@@ -229,6 +229,17 @@ class ORCFileReader::Impl {
     return GetArrowSchema(type, out);
   }
 
+  Result<std::shared_ptr<KeyValueMetadata>> ReadMetadata() {
+    std::list<std::string> keys = reader_->getMetadataKeys();

Review comment:
       Make this `const`?

##########
File path: cpp/src/arrow/adapters/orc/adapter.cc
##########
@@ -229,6 +229,17 @@ class ORCFileReader::Impl {
     return GetArrowSchema(type, out);
   }
 
+  Result<std::shared_ptr<KeyValueMetadata>> ReadMetadata() {
+    std::list<std::string> keys = reader_->getMetadataKeys();
+    auto metadata = std::make_shared<KeyValueMetadata>();
+    if (!keys.empty()) {

Review comment:
       Why not `for (const auto& key : keys) {`?

##########
File path: cpp/src/arrow/adapters/orc/adapter.h
##########
@@ -27,6 +27,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/key_value_metadata.h"

Review comment:
       No need for this a priori, as `KeyValueMetadata` is already declared in `arrow/type_fwd.h`.

##########
File path: python/pyarrow/tests/test_orc.py
##########
@@ -110,20 +114,20 @@ def check_example_file(orc_path, expected_df, need_fix=False):
     json_pos = 0
     for i in range(orc_file.nstripes):
         batch = orc_file.read_stripe(i)
-        check_example_values(pd.DataFrame(batch.to_pydict()),
-                             expected_df,
-                             start=json_pos,
-                             stop=json_pos + len(batch))
+        check_example_values(
+            pd.DataFrame(batch.to_pydict()),
+            expected_df,
+            start=json_pos,
+            stop=json_pos + len(batch),
+        )

Review comment:
       Please avoid doing unrelated style changes.
   Also, if you want an automated style pass, you can use `archery lint --python --fix`
   (see https://arrow.apache.org/docs/developers/archery.html)

##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -289,6 +289,8 @@ TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
   constexpr uint64_t reader_batch_size = 1024;
 
   auto writer = CreateWriter(stripe_size, *type, &mem_stream);
+  // writer->addUserMetadata("key1", "value1");
+  // writer->addUserMetadata("key2", "value2");

Review comment:
       Why the commented out code?

##########
File path: cpp/src/arrow/adapters/orc/adapter.h
##########
@@ -149,7 +156,7 @@ class ARROW_EXPORT ORCFileWriter {
   /// \brief Creates a new ORC writer.
   ///
   /// \param[in] output_stream a pointer to the io::OutputStream to write into
-  /// \return the returned writer object
+  /// \return the returned writer object or Status if error occurs

Review comment:
       There is no need to state this explicitly, as `Result<>` can implicitly convey an error.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org