You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/03 17:05:43 UTC

[arrow] branch master updated: ARROW-5365: [C++][CI] Enable ASAN/UBSAN in CI

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cdedd85  ARROW-5365: [C++][CI] Enable ASAN/UBSAN in CI
cdedd85 is described below

commit cdedd85f9c8545ab3c98e7b9de938a28d1ec9e11
Author: Micah Kornfield <em...@gmail.com>
AuthorDate: Mon Jun 3 12:05:35 2019 -0500

    ARROW-5365: [C++][CI] Enable ASAN/UBSAN in CI
    
    This enables ASAN/UBSAN in the clang builds.  Pushing this for PR early to see if there are any comments on approach or additional checks to remove (the first two fixes where related to passing null values to memcpy).
    
    Note the UBSAN behavior should already be configured to exclude unaligned pointer errors (I'll file a separate JIRA for fixing those).
    
    Remaining for this PR:
    * [X] Try to fix as many UBSAN/ASAN errors as possible (going to time box this to a week), and then blacklist the rest with a follow-up JIRA
    
    CC @wesm @pitrou in case you have any early feedback
    
    Author: Micah Kornfield <em...@gmail.com>
    
    Closes #4347 from emkornfield/use_asan and squashes the following commits:
    
    bba5824c4 <Micah Kornfield> fix io/memory.cc asan
    19ee908d7 <Micah Kornfield> handle signed and unsigned enum loading in parquet to support windows
    dc300d4cb <Micah Kornfield> add back asan
    05467be2f <Micah Kornfield> ubsan stuff
---
 .travis.yml                            |  2 +
 ci/travis_before_script_cpp.sh         |  9 +++++
 cpp/cmake_modules/DefineOptions.cmake  |  2 +
 cpp/cmake_modules/san-config.cmake     |  3 +-
 cpp/src/arrow/array/builder_binary.h   |  9 ++++-
 cpp/src/arrow/buffer-builder.h         |  8 +++-
 cpp/src/arrow/io/memory.cc             |  8 ++--
 cpp/src/arrow/ipc/json-test.cc         |  8 +++-
 cpp/src/arrow/ipc/metadata-internal.cc | 22 +++++++----
 cpp/src/arrow/json/test-common.h       |  4 +-
 cpp/src/arrow/util/rle-encoding.h      |  2 +-
 cpp/src/arrow/util/ubsan.cc            | 28 +++++++++++++
 cpp/src/arrow/util/ubsan.h             | 53 +++++++++++++++++++++++++
 cpp/src/parquet/metadata.cc            |  2 +
 cpp/src/parquet/schema.cc              | 72 +++++++++++++++++++++++++++++-----
 cpp/src/parquet/types.cc               |  8 ++++
 cpp/src/parquet/types.h                | 10 +++--
 cpp/src/plasma/protocol.cc             | 18 ++++++---
 18 files changed, 232 insertions(+), 36 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index efeaf20..8cd6c91 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -71,6 +71,8 @@ matrix:
     - ARROW_TRAVIS_VERBOSE=1
     - ARROW_TRAVIS_USE_SYSTEM_JAVA=1
     - ARROW_BUILD_WARNING_LEVEL=CHECKIN
+    - ARROW_USE_ASAN=1
+    - ARROW_USE_UBSAN=1
     - CC="clang-7"
     - CXX="clang++-7"
     before_script:
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 21ed9dd..ca01b55 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -121,6 +121,15 @@ if [ "$ARROW_TRAVIS_VALGRIND" == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_TEST_MEMCHECK=ON"
 fi
 
+if [ "$ARROW_USE_ASAN" == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_USE_ASAN=ON"
+fi
+
+if [ "$ARROW_USE_UBSAN" == "1" ]; then
+  CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_USE_UBSAN=ON"
+fi
+
+
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
   CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_GENERATE_COVERAGE=ON"
 fi
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index c10b6a6..b00af80 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -129,6 +129,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
 
   define_option(ARROW_USE_TSAN "Enable Thread Sanitizer checks" OFF)
 
+  define_option(ARROW_USE_UBSAN "Enable Undefined Behavior sanitizer checks" OFF)
+
   #----------------------------------------------------------------------
   set_option_category("Project component")
 
diff --git a/cpp/cmake_modules/san-config.cmake b/cpp/cmake_modules/san-config.cmake
index b2d3dac..95ef553 100644
--- a/cpp/cmake_modules/san-config.cmake
+++ b/cpp/cmake_modules/san-config.cmake
@@ -34,6 +34,7 @@ endif()
 # - disable 'vptr' because it currently crashes somewhere in boost::intrusive::list code
 # - disable 'alignment' because unaligned access is really OK on Nehalem and we do it
 #   all over the place.
+# - disable 'function' because it appears to give a false positive https://github.com/google/sanitizers/issues/911
 if(${ARROW_USE_UBSAN})
   if(NOT
      (("${COMPILER_FAMILY}" STREQUAL "clang")
@@ -45,7 +46,7 @@ if(${ARROW_USE_UBSAN})
   endif()
   set(
     CMAKE_CXX_FLAGS
-    "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all"
+    "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all"
     )
 endif()
 
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index a04e308..23a9645 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -48,7 +48,10 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
   Status Append(const uint8_t* value, int32_t length) {
     ARROW_RETURN_NOT_OK(Reserve(1));
     ARROW_RETURN_NOT_OK(AppendNextOffset());
-    ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    // Safety check for UBSAN.
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(value_data_builder_.Append(value, length));
+    }
 
     UnsafeAppendToBitmap(true);
     return Status::OK();
@@ -246,7 +249,9 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
 
   void UnsafeAppend(const uint8_t* value) {
     UnsafeAppendToBitmap(true);
-    byte_builder_.UnsafeAppend(value, byte_width_);
+    if (ARROW_PREDICT_TRUE(byte_width_ > 0)) {
+      byte_builder_.UnsafeAppend(value, byte_width_);
+    }
   }
 
   void UnsafeAppend(util::string_view value) {
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index dac2f39..f069ea4 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -29,6 +29,7 @@
 #include "arrow/status.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -42,7 +43,12 @@ namespace arrow {
 class ARROW_EXPORT BufferBuilder {
  public:
   explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
-      : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
+      : pool_(pool),
+        data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
+              &util::internal::non_null_filler),
+
+        capacity_(0),
+        size_(0) {}
 
   /// \brief Resize the buffer to the nearest multiple of 64 bytes
   ///
diff --git a/cpp/src/arrow/io/memory.cc b/cpp/src/arrow/io/memory.cc
index 4e9c2ea..22a9f9b 100644
--- a/cpp/src/arrow/io/memory.cc
+++ b/cpp/src/arrow/io/memory.cc
@@ -100,9 +100,11 @@ Status BufferOutputStream::Write(const void* data, int64_t nbytes) {
     return Status::IOError("OutputStream is closed");
   }
   DCHECK(buffer_);
-  RETURN_NOT_OK(Reserve(nbytes));
-  memcpy(mutable_data_ + position_, data, nbytes);
-  position_ += nbytes;
+  if (ARROW_PREDICT_TRUE(nbytes > 0)) {
+    RETURN_NOT_OK(Reserve(nbytes));
+    memcpy(mutable_data_ + position_, data, nbytes);
+    position_ += nbytes;
+  }
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/ipc/json-test.cc b/cpp/src/arrow/ipc/json-test.cc
index 2a98862..1b48484 100644
--- a/cpp/src/arrow/ipc/json-test.cc
+++ b/cpp/src/arrow/ipc/json-test.cc
@@ -59,7 +59,9 @@ void TestSchemaRoundTrip(const Schema& schema) {
   std::string json_schema = sb.GetString();
 
   rj::Document d;
-  d.Parse(json_schema);
+  // Pass explicit size to avoid ASAN issues with
+  // SIMD loads in RapidJson.
+  d.Parse(json_schema.data(), json_schema.size());
 
   DictionaryMemo in_memo;
   std::shared_ptr<Schema> out;
@@ -83,7 +85,9 @@ void TestArrayRoundTrip(const Array& array) {
   std::string array_as_json = sb.GetString();
 
   rj::Document d;
-  d.Parse(array_as_json);
+  // Pass explicit size to avoid ASAN issues with
+  // SIMD loads in RapidJson.
+  d.Parse(array_as_json.data(), array_as_json.size());
 
   if (d.HasParseError()) {
     FAIL() << "JSON parsing failed";
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 7a1e3b6..676a477 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -41,6 +41,7 @@
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -630,7 +631,8 @@ class FieldToFlatbufferVisitor {
       type_ids.push_back(code);
     }
 
-    auto fb_type_ids = fbb_.CreateVector(type_ids);
+    auto fb_type_ids =
+        fbb_.CreateVector(util::MakeNonNull(type_ids.data()), type_ids.size());
 
     type_offset_ = flatbuf::CreateUnion(fbb_, mode, fb_type_ids).Union();
     return Status::OK();
@@ -653,7 +655,8 @@ class FieldToFlatbufferVisitor {
   Status GetResult(const std::shared_ptr<Field>& field, FieldOffset* offset) {
     auto fb_name = fbb_.CreateString(field->name());
     RETURN_NOT_OK(VisitType(*field->type()));
-    auto fb_children = fbb_.CreateVector(children_);
+    auto fb_children =
+        fbb_.CreateVector(util::MakeNonNull(children_.data()), children_.size());
 
     DictionaryOffset dictionary = 0;
     if (field->type()->id() == Type::DICTIONARY) {
@@ -799,7 +802,7 @@ static Status WriteFieldNodes(FBB& fbb, const std::vector<FieldMetadata>& nodes,
     }
     fb_nodes.emplace_back(node.length, node.null_count);
   }
-  *out = fbb.CreateVectorOfStructs(fb_nodes);
+  *out = fbb.CreateVectorOfStructs(util::MakeNonNull(fb_nodes.data()), fb_nodes.size());
   return Status::OK();
 }
 
@@ -812,7 +815,9 @@ static Status WriteBuffers(FBB& fbb, const std::vector<BufferMetadata>& buffers,
     const BufferMetadata& buffer = buffers[i];
     fb_buffers.emplace_back(buffer.offset, buffer.length);
   }
-  *out = fbb.CreateVectorOfStructs(fb_buffers);
+  *out =
+      fbb.CreateVectorOfStructs(util::MakeNonNull(fb_buffers.data()), fb_buffers.size());
+
   return Status::OK();
 }
 
@@ -871,9 +876,11 @@ Status WriteTensorMessage(const Tensor& tensor, int64_t buffer_start_offset,
     dims.push_back(flatbuf::CreateTensorDim(fbb, tensor.shape()[i], name));
   }
 
-  auto fb_shape = fbb.CreateVector(dims);
-  auto fb_strides = fbb.CreateVector(tensor.strides());
+  auto fb_shape = fbb.CreateVector(util::MakeNonNull(dims.data()), dims.size());
 
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> fb_strides;
+  fb_strides = fbb.CreateVector(util::MakeNonNull(tensor.strides().data()),
+                                tensor.strides().size());
   int64_t body_length = tensor.size() * elem_size;
   flatbuf::Buffer buffer(buffer_start_offset, body_length);
 
@@ -1004,7 +1011,7 @@ FileBlocksToFlatbuffer(FBB& fbb, const std::vector<FileBlock>& blocks) {
     fb_blocks.emplace_back(block.offset, block.metadata_length, block.body_length);
   }
 
-  return fbb.CreateVectorOfStructs(fb_blocks);
+  return fbb.CreateVectorOfStructs(util::MakeNonNull(fb_blocks.data()), fb_blocks.size());
 }
 
 Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
@@ -1035,7 +1042,6 @@ Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dicti
 
   auto footer = flatbuf::CreateFooter(fbb, kCurrentMetadataVersion, fb_schema,
                                       fb_dictionaries, fb_record_batches);
-
   fbb.Finish(footer);
 
   int32_t size = fbb.GetSize();
diff --git a/cpp/src/arrow/json/test-common.h b/cpp/src/arrow/json/test-common.h
index 046c756..2905ae9 100644
--- a/cpp/src/arrow/json/test-common.h
+++ b/cpp/src/arrow/json/test-common.h
@@ -170,7 +170,9 @@ inline static Status ParseFromString(ParseOptions options, string_view src_str,
 
 static inline std::string PrettyPrint(string_view one_line) {
   rj::Document document;
-  document.Parse(one_line.data());
+
+  // Must pass size to avoid ASAN issues.
+  document.Parse(one_line.data(), one_line.size());
   rj::StringBuffer sb;
   rj::PrettyWriter<rj::StringBuffer> writer(sb);
   document.Accept(writer);
diff --git a/cpp/src/arrow/util/rle-encoding.h b/cpp/src/arrow/util/rle-encoding.h
index acefc8e..739158a 100644
--- a/cpp/src/arrow/util/rle-encoding.h
+++ b/cpp/src/arrow/util/rle-encoding.h
@@ -354,7 +354,7 @@ inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, T* values,
   int values_read = 0;
   int remaining_nulls = null_count;
 
-  internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size);
+  arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size);
 
   while (values_read < batch_size) {
     bool is_valid = bit_reader.IsSet();
diff --git a/cpp/src/arrow/util/ubsan.cc b/cpp/src/arrow/util/ubsan.cc
new file mode 100644
index 0000000..f3952f8
--- /dev/null
+++ b/cpp/src/arrow/util/ubsan.cc
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace util {
+
+namespace internal {
+
+uint8_t non_null_filler = 0xFF;
+
+}  // namespace internal
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/ubsan.h b/cpp/src/arrow/util/ubsan.h
new file mode 100644
index 0000000..f9fcfb5
--- /dev/null
+++ b/cpp/src/arrow/util/ubsan.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains utilities for making UBSan happy.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+namespace internal {
+
+static uint8_t non_null_filler;
+
+}  // namespace internal
+
+/// \brief Returns maybe_null if not null or a non-null pointer to an arbitrary memory
+/// that shouldn't be dereferenced.
+///
+/// Memset/Memcpy are undefinfed when a nullptr is passed as an argument use this utility
+/// method to wrap locations where this could happen.
+///
+/// Note: Flatbuffers has UBSan warnings if a zero length vector is passed.
+/// https://github.com/google/flatbuffers/pull/5355 is trying to resolve them.
+template <typename T>
+inline T* MakeNonNull(T* maybe_null) {
+  if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
+    return maybe_null;
+  }
+
+  return reinterpret_cast<T*>(&internal::non_null_filler);
+}
+
+}  // namespace util
+}  // namespace arrow
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 3806349..b5266e2 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -104,6 +104,8 @@ std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_d
       return MakeTypedColumnStats<ByteArrayType>(meta_data, descr);
     case Type::FIXED_LEN_BYTE_ARRAY:
       return MakeTypedColumnStats<FLBAType>(meta_data, descr);
+    case Type::UNDEFINED:
+      break;
   }
   throw ParquetException("Can't decode page statistics for selected column type");
 }
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index 862dfc9..8fbab85 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -409,6 +409,58 @@ std::unique_ptr<Node> GroupNode::FromParquet(const void* opaque_element, int nod
   return std::unique_ptr<Node>(group_node.release());
 }
 
+namespace {
+
+// If the parquet file is corrupted it is possible the type value decoded
+// will not be in the range of format::Type::type, which is undefined behavior.
+// This method prevents this by loading the value as the underlying type and checking
+// to make sure it is in range.
+template <typename ApiType>
+struct SafeLoader {
+  using ApiTypeEnum = typename ApiType::type;
+  using ApiTypeRawEnum = typename std::underlying_type<ApiTypeEnum>::type;
+
+  template <typename ThriftType>
+  inline static ApiTypeRawEnum LoadRaw(ThriftType* in) {
+    static_assert(
+        sizeof(ApiTypeEnum) >= sizeof(ThriftType),
+        "parquet type should always be the same size of larger then thrift type");
+    typename std::underlying_type<ThriftType>::type raw_value;
+    memcpy(&raw_value, in, sizeof(ThriftType));
+    return static_cast<ApiTypeRawEnum>(raw_value);
+  }
+
+  template <typename ThriftType, bool IsUnsigned = true>
+  inline static ApiTypeEnum LoadChecked(
+      typename std::enable_if<IsUnsigned, ThriftType>::type* in) {
+    auto raw_value = LoadRaw(in);
+    if (ARROW_PREDICT_FALSE(raw_value >=
+                            static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED))) {
+      return ApiType::UNDEFINED;
+    }
+    return FromThrift(static_cast<ThriftType>(raw_value));
+  }
+
+  template <typename ThriftType, bool IsUnsigned = false>
+  inline static ApiTypeEnum LoadChecked(
+      typename std::enable_if<!IsUnsigned, ThriftType>::type* in) {
+    auto raw_value = LoadRaw(in);
+    if (ARROW_PREDICT_FALSE(raw_value >=
+                                static_cast<ApiTypeRawEnum>(ApiType::UNDEFINED) ||
+                            raw_value < 0)) {
+      return ApiType::UNDEFINED;
+    }
+    return FromThrift(static_cast<ThriftType>(raw_value));
+  }
+
+  template <typename ThriftType>
+  inline static ApiTypeEnum Load(ThriftType* in) {
+    return LoadChecked<ThriftType, std::is_unsigned<ApiTypeRawEnum>::value>(in);
+  }
+};
+
+}  // namespace
+
 std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
                                                  int node_id) {
   const format::SchemaElement* element =
@@ -417,21 +469,23 @@ std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
   std::unique_ptr<PrimitiveNode> primitive_node;
   if (element->__isset.logicalType) {
     // updated writer with logical type present
-    primitive_node = std::unique_ptr<PrimitiveNode>(
-        new PrimitiveNode(element->name, FromThrift(element->repetition_type),
-                          LogicalAnnotation::FromThrift(element->logicalType),
-                          FromThrift(element->type), element->type_length, node_id));
+    primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
+        element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
+        LogicalAnnotation::FromThrift(element->logicalType),
+        SafeLoader<Type>::Load(&(element->type)), element->type_length, node_id));
   } else if (element->__isset.converted_type) {
     // legacy writer with logical type present
     primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
-        element->name, FromThrift(element->repetition_type), FromThrift(element->type),
-        FromThrift(element->converted_type), element->type_length, element->precision,
-        element->scale, node_id));
+        element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
+        SafeLoader<Type>::Load(&(element->type)),
+        SafeLoader<LogicalType>::Load(&(element->converted_type)), element->type_length,
+        element->precision, element->scale, node_id));
   } else {
     // logical type not present
     primitive_node = std::unique_ptr<PrimitiveNode>(new PrimitiveNode(
-        element->name, FromThrift(element->repetition_type), NoAnnotation::Make(),
-        FromThrift(element->type), element->type_length, node_id));
+        element->name, SafeLoader<Repetition>::Load(&(element->repetition_type)),
+        NoAnnotation::Make(), SafeLoader<Type>::Load(&(element->type)),
+        element->type_length, node_id));
   }
 
   // Return as unique_ptr to the base type
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 62083c3..db48b24 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -95,6 +95,7 @@ std::string FormatStatValue(Type::type parquet_type, const std::string& val) {
     case Type::FIXED_LEN_BYTE_ARRAY: {
       return val;
     }
+    case Type::UNDEFINED:
     default:
       break;
   }
@@ -132,6 +133,7 @@ std::string FormatStatValue(Type::type parquet_type, const char* val) {
       result << val;
       break;
     }
+    case Type::UNDEFINED:
     default:
       break;
   }
@@ -200,6 +202,7 @@ std::string TypeToString(Type::type t) {
       return "BYTE_ARRAY";
     case Type::FIXED_LEN_BYTE_ARRAY:
       return "FIXED_LEN_BYTE_ARRAY";
+    case Type::UNDEFINED:
     default:
       return "UNKNOWN";
   }
@@ -253,6 +256,7 @@ std::string LogicalTypeToString(LogicalType::type t) {
       return "BSON";
     case LogicalType::INTERVAL:
       return "INTERVAL";
+    case LogicalType::UNDEFINED:
     default:
       return "UNKNOWN";
   }
@@ -276,6 +280,7 @@ int GetTypeByteSize(Type::type parquet_type) {
       return type_traits<ByteArrayType::type_num>::value_byte_size;
     case Type::FIXED_LEN_BYTE_ARRAY:
       return type_traits<FLBAType::type_num>::value_byte_size;
+    case Type::UNDEFINED:
     default:
       return 0;
   }
@@ -295,6 +300,7 @@ SortOrder::type DefaultSortOrder(Type::type primitive) {
     case Type::FIXED_LEN_BYTE_ARRAY:
       return SortOrder::UNSIGNED;
     case Type::INT96:
+    case Type::UNDEFINED:
       return SortOrder::UNKNOWN;
   }
   return SortOrder::UNKNOWN;
@@ -330,6 +336,7 @@ SortOrder::type GetSortOrder(LogicalType::type converted, Type::type primitive)
     case LogicalType::INTERVAL:
     case LogicalType::NONE:  // required instead of default
     case LogicalType::NA:    // required instead of default
+    case LogicalType::UNDEFINED:
       return SortOrder::UNKNOWN;
   }
   return SortOrder::UNKNOWN;
@@ -400,6 +407,7 @@ std::shared_ptr<const LogicalAnnotation> LogicalAnnotation::FromConvertedType(
     case LogicalType::NONE:
       return NoAnnotation::Make();
     case LogicalType::NA:
+    case LogicalType::UNDEFINED:
       return UnknownAnnotation::Make();
   }
   return UnknownAnnotation::Make();
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index a109ae1..779ea6b 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -61,7 +61,9 @@ struct Type {
     FLOAT = 4,
     DOUBLE = 5,
     BYTE_ARRAY = 6,
-    FIXED_LEN_BYTE_ARRAY = 7
+    FIXED_LEN_BYTE_ARRAY = 7,
+    // Should always be last element.
+    UNDEFINED = 8
   };
 };
 
@@ -91,7 +93,9 @@ struct LogicalType {
     JSON,
     BSON,
     INTERVAL,
-    NA = 25
+    NA = 25,
+    // Should always be last element.
+    UNDEFINED = 26
   };
 };
 
@@ -103,7 +107,7 @@ class LogicalType;
 
 // Mirrors parquet::FieldRepetitionType
 struct Repetition {
-  enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2 };
+  enum type { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, /*Always last*/ UNDEFINED = 3 };
 };
 
 // Reference:
diff --git a/cpp/src/plasma/protocol.cc b/cpp/src/plasma/protocol.cc
index a878647..cf8a7f2 100644
--- a/cpp/src/plasma/protocol.cc
+++ b/cpp/src/plasma/protocol.cc
@@ -28,6 +28,7 @@
 #ifdef PLASMA_CUDA
 #include "arrow/gpu/cuda_api.h"
 #endif
+#include "arrow/util/ubsan.h"
 
 namespace fb = plasma::flatbuf;
 
@@ -49,7 +50,7 @@ ToFlatbuffer(flatbuffers::FlatBufferBuilder* fbb, const ObjectID* object_ids,
   for (int64_t i = 0; i < num_objects; i++) {
     results.push_back(fbb->CreateString(object_ids[i].binary()));
   }
-  return fbb->CreateVector(results);
+  return fbb->CreateVector(arrow::util::MakeNonNull(results.data()), results.size());
 }
 
 Status PlasmaReceive(int sock, MessageType message_type, std::vector<uint8_t>* buffer) {
@@ -341,7 +342,9 @@ Status SendDeleteReply(int sock, const std::vector<ObjectID>& object_ids,
   auto message = fb::CreatePlasmaDeleteReply(
       fbb, static_cast<int32_t>(object_ids.size()),
       ToFlatbuffer(&fbb, &object_ids[0], object_ids.size()),
-      fbb.CreateVector(reinterpret_cast<const int32_t*>(&errors[0]), object_ids.size()));
+      fbb.CreateVector(
+          arrow::util::MakeNonNull(reinterpret_cast<const int32_t*>(errors.data())),
+          object_ids.size()));
   return PlasmaSend(sock, MessageType::PlasmaDeleteReply, &fbb, message);
 }
 
@@ -421,7 +424,9 @@ Status SendListReply(int sock, const ObjectTable& objects) {
                                      entry.second->construct_duration, digest);
     object_infos.push_back(info);
   }
-  auto message = fb::CreatePlasmaListReply(fbb, fbb.CreateVector(object_infos));
+  auto message = fb::CreatePlasmaListReply(
+      fbb, fbb.CreateVector(arrow::util::MakeNonNull(object_infos.data()),
+                            object_infos.size()));
   return PlasmaSend(sock, MessageType::PlasmaListReply, &fbb, message);
 }
 
@@ -538,6 +543,7 @@ Status SendGetReply(int sock, ObjectID object_ids[],
     if (object.device_num != 0) {
       std::shared_ptr<arrow::Buffer> handle;
       RETURN_NOT_OK(object.ipc_handle->Serialize(arrow::default_memory_pool(), &handle));
+      handles.push_back(fb::CreateCudaHandle(fbb, fbb.CreateVector(handle)));
       handles.push_back(
           fb::CreateCudaHandle(fbb, fbb.CreateVector(handle->data(), handle->size())));
     }
@@ -545,8 +551,10 @@ Status SendGetReply(int sock, ObjectID object_ids[],
   }
   auto message = fb::CreatePlasmaGetReply(
       fbb, ToFlatbuffer(&fbb, object_ids, num_objects),
-      fbb.CreateVectorOfStructs(objects.data(), num_objects), fbb.CreateVector(store_fds),
-      fbb.CreateVector(mmap_sizes), fbb.CreateVector(handles));
+      fbb.CreateVectorOfStructs(arrow::util::MakeNonNull(objects.data()), num_objects),
+      fbb.CreateVector(arrow::util::MakeNonNull(store_fds.data()), store_fds.size()),
+      fbb.CreateVector(arrow::util::MakeNonNull(mmap_sizes.data()), mmap_sizes.size()),
+      fbb.CreateVector(arrow::util::MakeNonNull(handles.data()), handles.size()));
   return PlasmaSend(sock, MessageType::PlasmaGetReply, &fbb, message);
 }