You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jira@arrow.apache.org by "Edward Nolan (Jira)" <ji...@apache.org> on 2021/05/20 15:09:00 UTC

[jira] [Created] (ARROW-12840) [C++] Unable to subclass parquet::StreamWriter when statically linking and using C++17

Edward Nolan created ARROW-12840:
------------------------------------

             Summary: [C++] Unable to subclass parquet::StreamWriter when statically linking and using C++17
                 Key: ARROW-12840
                 URL: https://issues.apache.org/jira/browse/ARROW-12840
             Project: Apache Arrow
          Issue Type: Bug
          Components: C++
         Environment: OS: Ubuntu 21.04

Linux ed-ubuntu-pc 5.11.0-16-generic #17-Ubuntu SMP Wed Apr 14 20:12:43 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux

Compiler: gcc version 9.3.0 (Ubuntu 9.3.0-23ubuntu2)

Linker: GNU ld 2.36.1
            Reporter: Edward Nolan


Git hash of Arrow version tested:

aa37d197a63a7efbc0660f9cea2f75cc08c30587

 

Steps to reproduce:

Build Arrow and Parquet statically, and install to a prefix directory, ~/arrow/cpp/release/prefix:
{code:java}
cd
git clone https://github.com/apache/arrow.git
cd arrow/cpp/
mkdir release
cd release
cmake -DARROW_PARQUET=ON -DARROW_BUILD_SHARED=OFF -DARROW_BUILD_STATIC=ON ..
make
mkdir prefix
make DESTDIR=prefix install
{code}
Build the following main file:
{code:java}
#include <parquet/stream_writer.h>

struct Foo{};

struct StreamWriter : parquet::StreamWriter {
  using parquet::StreamWriter::StreamWriter;

  StreamWriter& operator<<(Foo) {
    this->Write<parquet::BoolWriter>(false);
    return *this;
  }
};

int main(int, char**) {
  StreamWriter os{nullptr};
  os << Foo{} << parquet::EndRow;
  return 0;
}
{code}
Using the following CMakeLists.txt:
{code:java}
project("arrowodrbug")

set(ARROW_HOME "/home/enolan/arrow/cpp/release/prefix/usr/local")
find_package(Arrow CONFIG PATHS "/home/enolan/arrow/cpp/release/prefix/usr/local/lib/cmake/arrow" NO_DEFAULT_PATH REQUIRED)
find_package(Parquet CONFIG PATHS "/home/enolan/arrow/cpp/release/prefix/usr/local/lib/cmake/arrow" NO_DEFAULT_PATH REQUIRED)

set(CMAKE_CXX_FLAGS "-std=c++17")

add_executable(main main.cpp)

target_link_libraries(main parquet_static arrow_static)
{code}
Expected behavior:

Executable successfully links

Actual behavior:

Get the following linker error:
{code:java}
/usr/bin/ld: /home/enolan/arrow/cpp/release/prefix/usr/local/lib/libparquet.a(stream_writer.cc.o):(.rodata+0x78): multiple definition of `parquet::StreamWriter::kRepLevelZero'; CMakeFiles/main.dir/main.o:(.rodata._ZN7parquet12StreamWriter13kRepLevelZeroE[_ZN7parquet12StreamWriter13kRepLevelZeroE]+0x0): first defined here
/usr/bin/ld: /home/enolan/arrow/cpp/release/prefix/usr/local/lib/libparquet.a(stream_writer.cc.o):(.rodata+0x7a): multiple definition of `parquet::StreamWriter::kDefLevelOne'; CMakeFiles/main.dir/main.o:(.rodata._ZN7parquet12StreamWriter12kDefLevelOneE[_ZN7parquet12StreamWriter12kDefLevelOneE]+0x0): first defined here
{code}
This issue is most likely caused by the StreamWriter class violating ODR in C++17. The issue doesn't seem to occur when building with earlier C++ versions.

This change seems to prevent the issue:
{code:java}
diff --git a/cpp/src/parquet/stream_writer.cc b/cpp/src/parquet/stream_writer.cc
index 253ebf1bc..ea8f2678d 100644
--- a/cpp/src/parquet/stream_writer.cc
+++ b/cpp/src/parquet/stream_writer.cc
@@ -23,10 +23,10 @@ namespace parquet {

 int64_t StreamWriter::default_row_group_size_{512 * 1024 * 1024};  // 512MB

-constexpr int16_t StreamWriter::kDefLevelZero;
-constexpr int16_t StreamWriter::kDefLevelOne;
-constexpr int16_t StreamWriter::kRepLevelZero;
-constexpr int64_t StreamWriter::kBatchSizeOne;
+int16_t StreamWriter::kDefLevelZero = 0;
+int16_t StreamWriter::kDefLevelOne = 1;
+int16_t StreamWriter::kRepLevelZero = 0;
+int64_t StreamWriter::kBatchSizeOne = 1;

 StreamWriter::FixedStringView::FixedStringView(const char* data_ptr)
     : data{data_ptr}, size{std::strlen(data_ptr)} {}
diff --git a/cpp/src/parquet/stream_writer.h b/cpp/src/parquet/stream_writer.h
index d0db850c3..b7339609e 100644
--- a/cpp/src/parquet/stream_writer.h
+++ b/cpp/src/parquet/stream_writer.h
@@ -220,10 +220,10 @@ class PARQUET_EXPORT StreamWriter {
   std::unique_ptr<RowGroupWriter, null_deleter> row_group_writer_;
   std::vector<node_ptr_type> nodes_;

-  static constexpr int16_t kDefLevelZero = 0;
-  static constexpr int16_t kDefLevelOne = 1;
-  static constexpr int16_t kRepLevelZero = 0;
-  static constexpr int64_t kBatchSizeOne = 1;
+  static int16_t kDefLevelZero;
+  static int16_t kDefLevelOne;
+  static int16_t kRepLevelZero;
+  static int64_t kBatchSizeOne;

   static int64_t default_row_group_size_;
 };
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)