You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/12/03 21:01:28 UTC

[GitHub] [arrow] xhochy commented on a change in pull request #8648: ARROW-7906: [C++] Add ORC write support

xhochy commented on a change in pull request #8648:
URL: https://github.com/apache/arrow/pull/8648#discussion_r535594787



##########
File path: cpp/src/arrow/adapters/orc/adapter.cc
##########
@@ -473,6 +453,80 @@ int64_t ORCFileReader::NumberOfStripes() { return impl_->NumberOfStripes(); }
 
 int64_t ORCFileReader::NumberOfRows() { return impl_->NumberOfRows(); }
 
+class ORCFileWriter::Impl {
+ public:
+  Status Open(Schema* schema, const std::shared_ptr<io::FileOutputStream>& file,
+              std::shared_ptr<liborc::WriterOptions> options,
+              std::shared_ptr<ArrowWriterOptions> arrow_options,

Review comment:
       Pass in `shared_ptr` instances via reference.
   ```suggestion
                 const std::shared_ptr<liborc::WriterOptions>& options,
                 const std::shared_ptr<ArrowWriterOptions>& arrow_options,
   ```

##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -157,4 +217,15960 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// Arrow2ORC type converter tests
+
+TEST(TestAdapterWriteConverter, typeBool) {
+  DataType* type = boolean().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));

Review comment:
       Use `ARROW_EXPECT_OK` instead of silencing the warning using `(void)`.
   ```suggestion
     ARROW_EXPECT_OK(adapters::orc::GetORCType(type, &out));
   ```

##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -157,4 +217,15960 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// Arrow2ORC type converter tests
+
+TEST(TestAdapterWriteConverter, typeBool) {
+  DataType* type = boolean().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BOOLEAN);
+}
+TEST(TestAdapterWriteConverter, typeInt8) {
+  DataType* type = int8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BYTE);
+}
+TEST(TestAdapterWriteConverter, typeInt16) {
+  DataType* type = int16().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::SHORT);
+}
+TEST(TestAdapterWriteConverter, typeInt32) {
+  DataType* type = int32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeInt64) {
+  DataType* type = int64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LONG);
+}
+TEST(TestAdapterWriteConverter, typeFloat) {
+  DataType* type = float32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::FLOAT);
+}
+TEST(TestAdapterWriteConverter, typeDouble) {
+  DataType* type = float64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DOUBLE);
+}
+TEST(TestAdapterWriteConverter, typeString) {
+  DataType* type = utf8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeLargeString) {
+  DataType* type = large_utf8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeBinary) {
+  DataType* type = binary().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeLargeBinary) {
+  DataType* type = large_binary().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeBinary) {
+  DataType* type = fixed_size_binary(3).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeBinaryZero) {
+  DataType* type = fixed_size_binary(0).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeDate32) {
+  DataType* type = date32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DATE);
+}
+TEST(TestAdapterWriteConverter, typeDate64) {
+  DataType* type = date64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampSecond) {
+  DataType* type = timestamp(TimeUnit::type::SECOND).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampMilli) {
+  DataType* type = timestamp(TimeUnit::type::MILLI).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampMicro) {
+  DataType* type = timestamp(TimeUnit::type::MICRO).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampNano) {
+  DataType* type = timestamp(TimeUnit::type::NANO).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeDecimal) {
+  DataType* type = decimal(32, 5).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DECIMAL);
+  EXPECT_EQ(out->getPrecision(), 32);
+  EXPECT_EQ(out->getScale(), 5);
+}
+TEST(TestAdapterWriteConverter, typeList) {
+  auto sharedPtrArrowType = list(std::make_shared<Field>("a", int32()));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeLargeList) {
+  auto sharedPtrArrowType = large_list(std::make_shared<Field>("a", int32()));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeList) {
+  auto sharedPtrArrowType = fixed_size_list(std::make_shared<Field>("a", int32()), 3);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeListZero) {
+  auto sharedPtrArrowType = fixed_size_list(std::make_shared<Field>("a", int32()), 0);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeStructTrivial) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  auto sharedPtrArrowType = struct_(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 0);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+}
+TEST(TestAdapterWriteConverter, typeStructSingleton) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));

Review comment:
       This can be condensed to 
   ```suggestion
     std::vector<std::shared_ptr<Field>> xFields{arrow::field("a", utf8())};
   ```

##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -157,4 +217,15960 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// Arrow2ORC type converter tests
+
+TEST(TestAdapterWriteConverter, typeBool) {
+  DataType* type = boolean().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BOOLEAN);
+}
+TEST(TestAdapterWriteConverter, typeInt8) {
+  DataType* type = int8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BYTE);
+}
+TEST(TestAdapterWriteConverter, typeInt16) {
+  DataType* type = int16().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::SHORT);
+}
+TEST(TestAdapterWriteConverter, typeInt32) {
+  DataType* type = int32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeInt64) {
+  DataType* type = int64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LONG);
+}
+TEST(TestAdapterWriteConverter, typeFloat) {
+  DataType* type = float32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::FLOAT);
+}
+TEST(TestAdapterWriteConverter, typeDouble) {
+  DataType* type = float64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DOUBLE);
+}
+TEST(TestAdapterWriteConverter, typeString) {
+  DataType* type = utf8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeLargeString) {
+  DataType* type = large_utf8().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeBinary) {
+  DataType* type = binary().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeLargeBinary) {
+  DataType* type = large_binary().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeBinary) {
+  DataType* type = fixed_size_binary(3).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeBinaryZero) {
+  DataType* type = fixed_size_binary(0).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::BINARY);
+}
+TEST(TestAdapterWriteConverter, typeDate32) {
+  DataType* type = date32().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DATE);
+}
+TEST(TestAdapterWriteConverter, typeDate64) {
+  DataType* type = date64().get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampSecond) {
+  DataType* type = timestamp(TimeUnit::type::SECOND).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampMilli) {
+  DataType* type = timestamp(TimeUnit::type::MILLI).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampMicro) {
+  DataType* type = timestamp(TimeUnit::type::MICRO).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeTimestampNano) {
+  DataType* type = timestamp(TimeUnit::type::NANO).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::TIMESTAMP);
+}
+TEST(TestAdapterWriteConverter, typeDecimal) {
+  DataType* type = decimal(32, 5).get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::DECIMAL);
+  EXPECT_EQ(out->getPrecision(), 32);
+  EXPECT_EQ(out->getScale(), 5);
+}
+TEST(TestAdapterWriteConverter, typeList) {
+  auto sharedPtrArrowType = list(std::make_shared<Field>("a", int32()));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeLargeList) {
+  auto sharedPtrArrowType = large_list(std::make_shared<Field>("a", int32()));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeList) {
+  auto sharedPtrArrowType = fixed_size_list(std::make_shared<Field>("a", int32()), 3);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeFixedSizeListZero) {
+  auto sharedPtrArrowType = fixed_size_list(std::make_shared<Field>("a", int32()), 0);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeStructTrivial) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  auto sharedPtrArrowType = struct_(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 0);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+}
+TEST(TestAdapterWriteConverter, typeStructSingleton) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  auto sharedPtrArrowType = struct_(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+  EXPECT_EQ(out->getFieldName(0), "a");
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeStruct) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  xFields.push_back(std::make_shared<Field>("b", int32()));
+  auto sharedPtrArrowType = struct_(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 2);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+  EXPECT_EQ(out->getFieldName(0), "a");
+  EXPECT_EQ(out->getFieldName(1), "b");
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+  EXPECT_EQ(out->getSubtype(1)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeMap) {
+  auto sharedPtrArrowType = map(utf8(), int32());
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 2);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::MAP);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+  EXPECT_EQ(out->getSubtype(1)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeDenseUnionTrivial) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  auto sharedPtrArrowType = dense_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 0);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+}
+TEST(TestAdapterWriteConverter, typeDenseUnionSingleton) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  auto sharedPtrArrowType = dense_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, typeDenseUnion) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  xFields.push_back(std::make_shared<Field>("b", int32()));
+  auto sharedPtrArrowType = dense_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 2);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+  EXPECT_EQ(out->getSubtype(1)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeSparseUnionTrivial) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  auto sharedPtrArrowType = sparse_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 0);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+}
+TEST(TestAdapterWriteConverter, typeSparseUnionSingleton) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("b", int32()));
+  auto sharedPtrArrowType = sparse_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeSparseUnion) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  xFields.push_back(std::make_shared<Field>("b", int32()));
+  auto sharedPtrArrowType = sparse_union(xFields);
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 2);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::UNION);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+  EXPECT_EQ(out->getSubtype(1)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeListOfList) {
+  auto sharedPtrArrowSubtype = list(std::make_shared<Field>("a", int32()));
+  auto sharedPtrArrowType = list(std::make_shared<Field>("a", sharedPtrArrowSubtype));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeListOfMap) {
+  auto sharedPtrArrowSubtype = map(utf8(), int32());
+  auto sharedPtrArrowType = list(std::make_shared<Field>("a", sharedPtrArrowSubtype));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtypeCount(), 2);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::MAP);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(1)->getKind(), liborc::TypeKind::INT);
+}
+TEST(TestAdapterWriteConverter, typeListOfStructOfLists) {
+  auto sharedPtrArrowSubsubtype0 = list(std::make_shared<Field>("a", int8()));
+  auto sharedPtrArrowSubsubtype1 = list(std::make_shared<Field>("b", float64()));
+  auto sharedPtrArrowSubsubtype2 = list(std::make_shared<Field>("c", date32()));
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", sharedPtrArrowSubsubtype0));
+  xFields.push_back(std::make_shared<Field>("b", sharedPtrArrowSubsubtype1));
+  xFields.push_back(std::make_shared<Field>("c", sharedPtrArrowSubsubtype2));
+  auto sharedPtrArrowSubtype = struct_(xFields);
+  auto sharedPtrArrowType = list(std::make_shared<Field>("x", sharedPtrArrowSubtype));
+  DataType* type = sharedPtrArrowType.get();
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(type, &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtypeCount(), 3);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRUCT);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(1)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(1)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(2)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(2)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getSubtype(0)->getKind(),
+            liborc::TypeKind::BYTE);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(1)->getSubtype(0)->getKind(),
+            liborc::TypeKind::DOUBLE);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(2)->getSubtype(0)->getKind(),
+            liborc::TypeKind::DATE);
+}
+TEST(TestAdapterWriteConverter, schemaTrivial) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  std::shared_ptr<Schema> sharedPtrSchema = std::make_shared<Schema>(xFields);
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(sharedPtrSchema.get(), &out));
+  EXPECT_EQ(out->getSubtypeCount(), 0);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+}
+TEST(TestAdapterWriteConverter, schemaSingleton) {
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", utf8()));
+  std::shared_ptr<Schema> sharedPtrSchema = std::make_shared<Schema>(xFields);
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(sharedPtrSchema.get(), &out));
+  EXPECT_EQ(out->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+  EXPECT_EQ(out->getFieldName(0), "a");
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::STRING);
+}
+TEST(TestAdapterWriteConverter, schemaMixed1) {
+  auto sharedPtrArrowSubsubtype0 = list(std::make_shared<Field>("a", large_utf8()));
+  auto sharedPtrArrowSubtype0 =
+      list(std::make_shared<Field>("a", sharedPtrArrowSubsubtype0));
+  auto sharedPtrArrowSubtype1 = list(std::make_shared<Field>("b", decimal(30, 4)));
+  auto sharedPtrArrowSubtype2 =
+      list(std::make_shared<Field>("c", timestamp(TimeUnit::type::MICRO)));
+  std::vector<std::shared_ptr<Field>> xFields;
+  xFields.push_back(std::make_shared<Field>("a", sharedPtrArrowSubtype0));
+  xFields.push_back(std::make_shared<Field>("b", sharedPtrArrowSubtype1));
+  xFields.push_back(std::make_shared<Field>("c", sharedPtrArrowSubtype2));
+  xFields.push_back(std::make_shared<Field>("d", boolean()));
+  xFields.push_back(std::make_shared<Field>("e", fixed_size_binary(5)));
+  std::shared_ptr<Schema> sharedPtrSchema = std::make_shared<Schema>(xFields);
+  ORC_UNIQUE_PTR<liborc::Type> out;
+  (void)(adapters::orc::GetORCType(sharedPtrSchema.get(), &out));
+  EXPECT_EQ(out->getSubtypeCount(), 5);
+  EXPECT_EQ(out->getKind(), liborc::TypeKind::STRUCT);
+  EXPECT_EQ(out->getFieldName(0), "a");
+  EXPECT_EQ(out->getFieldName(1), "b");
+  EXPECT_EQ(out->getFieldName(2), "c");
+  EXPECT_EQ(out->getFieldName(3), "d");
+  EXPECT_EQ(out->getFieldName(4), "e");
+  EXPECT_EQ(out->getSubtype(0)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(1)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(1)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(2)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(2)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(3)->getKind(), liborc::TypeKind::BOOLEAN);
+  EXPECT_EQ(out->getSubtype(4)->getKind(), liborc::TypeKind::BINARY);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getSubtypeCount(), 1);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getKind(), liborc::TypeKind::LIST);
+  EXPECT_EQ(out->getSubtype(1)->getSubtype(0)->getKind(), liborc::TypeKind::DECIMAL);
+  EXPECT_EQ(out->getSubtype(2)->getSubtype(0)->getKind(), liborc::TypeKind::TIMESTAMP);
+  EXPECT_EQ(out->getSubtype(0)->getSubtype(0)->getSubtype(0)->getKind(),
+            liborc::TypeKind::STRING);
+}
+
+// WriteORC tests
+// TEST(TestAdapterWriteNumerical, writeBoolEmpty0) {
+//   BooleanBuilder builder;
+//   std::shared_ptr<Array> array;
+//   (void)(builder.Finish(&array));
+//   std::shared_ptr<Table> table = std::make_shared<Table>({array},{std::String("a")});
+//   MemoryOutputStreamV2 file(DEFAULT_SMALL_MEM_STREAM_SIZE);
+//   std::unique_ptr<adapters::orc::ORCFileWriter>* writer;
+//   ORCFileWriter::Open(table->schema().get(),
+//                     const std::shared_ptr<io::FileOutputStream>& file,
+//                     std::shared_ptr<liborc::WriterOptions> options,
+//                     std::shared_ptr<ArrowWriterOptions> arrow_options,
+//                     std::unique_ptr<ORCFileWriter>* writer
+//                     )
+// }
+
+// Numeric
+
+// Bool
+TEST(TestAdapterWriteNumerical, writeBoolEmpty) {
+  BooleanBuilder builder;
+  std::shared_ptr<Array> array;
+  (void)(builder.Finish(&array));
+  MemoryOutputStream mem_stream(DEFAULT_SMALL_MEM_STREAM_SIZE);
+  ORC_UNIQUE_PTR<liborc::Type> schema(
+      liborc::Type::buildTypeFromString("struct<x:boolean>"));
+  liborc::WriterOptions options;
+  ORC_UNIQUE_PTR<liborc::Writer> writer = createWriter(*schema, &mem_stream, options);
+  uint64_t batchSize = 1024;
+  ORC_UNIQUE_PTR<liborc::ColumnVectorBatch> batch = writer->createRowBatch(batchSize);
+  liborc::StructVectorBatch* root =
+      internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+  liborc::LongVectorBatch* x =
+      internal::checked_cast<liborc::LongVectorBatch*>(root->fields[0]);
+  DataType* arrowType = boolean().get();
+  int64_t arrowOffset = 0;
+  int64_t orcOffset = 0;
+  Status st = adapters::orc::FillBatch(arrowType, x, arrowOffset, orcOffset, batchSize,
+                                       array.get());
+  if (!st.ok()) {
+    FAIL() << "ORC ColumnBatch not successfully filled";
+  }
+  EXPECT_EQ(x->numElements, 0);
+  EXPECT_FALSE(x->hasNulls);
+  EXPECT_EQ(arrowOffset, 0);
+  EXPECT_EQ(orcOffset, 0);
+  writer->add(*batch);
+  writer->close();
+}
+TEST(TestAdapterWriteNumerical, writeBoolNoNulls) {
+  BooleanBuilder builder;

Review comment:
       Instead of using a builder, use `ArrayFromJSON`, e.g. `ArrayFromJSON(arrow::boolean(), "[true, false]")`.

##########
File path: cpp/src/arrow/adapters/orc/adapter.h
##########
@@ -19,14 +19,38 @@
 
 #include <cstdint>
 #include <memory>
+#include <sstream>
 #include <vector>
 
+#include "arrow/io/file.h"
 #include "arrow/io/interfaces.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/visibility.h"
+#include "orc/OrcFile.hh"
+
+namespace liborc = orc;
+
+#define ORC_THROW_NOT_OK(s)                   \

Review comment:
       If we have these macros in headers, we should call them `ARROW_ ORC_THROW_NOT_OK`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org