You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/05/28 07:10:45 UTC

[arrow] branch main updated: GH-35598: [MATLAB] Add a public `Valid` property to to the `MATLAB arrow.array.` classes to query Null values (i.e. validity bitmap support) (#35655)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 05fe0d2583 GH-35598: [MATLAB] Add a public `Valid` property to to the `MATLAB arrow.array.<Array>` classes to query Null values (i.e. validity bitmap support) (#35655)
05fe0d2583 is described below

commit 05fe0d25834fd1629d71ceb51f0281b44a511f94
Author: Kevin Gurney <kg...@mathworks.com>
AuthorDate: Sun May 28 03:10:39 2023 -0400

    GH-35598: [MATLAB] Add a public `Valid` property to to the `MATLAB arrow.array.<Array>` classes to query Null values (i.e. validity bitmap support) (#35655)
    
    ### Rationale for this change
    
    Currently, the `arrow.array.<Array>` classes do not support querying the Null values (i.e. validity bitmap) on an Arrow array. Support for encoding Null values is an important part of the Arrow memory format, so the MATLAB Interface to Arrow should support it.
    
    There are likely multiple different APIs that the MATLAB interface should have to support Null values robustly. However, to focus on incremental delivery, we can start by adding a public `Valid` property to the `arrow.array.<Array>` classes, which would return a `logical` array of null values in the given array.
    
    ### What changes are included in this PR?
    
    1. Added a new public property `Valid` to the `arrow.array.Array` superclass.
    2. Implemented basic null value handling for `arrow.array.Float64Array` (i.e. treat `NaN` values in the input MATLAB array as null values in the corresponding `arrow.array.Float64Array`).
    3. Implement null value substitution (i.e. substitute null values with `NaN`) for `Float64Array` in `toMATLAB` and `double` conversion methods.
    
    Example of creating an `arrow.array.Float64Array` from a MATLAB `double` array containing `NaN` values:
    
    ```matlab
    >> matlabArray = [1, 2, NaN, 4, NaN]'
    
    matlabArray =
    
         1
         2
       NaN
         4
       NaN
    
    >> arrowArray = arrow.array.Float64Array(matlabArray)
    
    arrowArray =
    
    [
      1,
      2,
      null,
      4,
      null
    ]
    
    >> arrowArray.Valid
    
    ans =
    
      5×1 logical array
    
       1
       1
       0
       1
       0
    
    >> all(~isnan(matlabArray) == arrowArray.Valid)
    
    ans =
    
      logical
    
       1
    ```
    
    ### Are these changes tested?
    
    Yes, we have added the following test points for the `Valid` property of `arrow.array.Float64Array`:
    
    1. `ValidBasic`
    2. `ValidNoNulls`
    4. `ValidAllNulls`
    5. `ValidEmpty`
    
    ### Are there any user-facing changes?
    
    Yes.
    
    There is now a public property `Valid` on the arrow.array.Float64Array` class which is a MATLAB `logical` array encoding the null values in the underlying Arrow array, where `true` indicates an element is valid (i.e. not null) and `false` indicates that an element is invalid (i.e. null).
    
    ### Future Directions
    
    1. Implement more null value related methods like `isvalid`, `isnull`, `packagedValidityBitmap`, etc.
    2. Add null value (i.e. `Valid` property) support to the rest of the `arrow.array.Array` subclasses.
    
    ### Notes
    
    1. Thank you to @ sgilmore10 for your help with this pull request!
    
    Lead-authored-by: Kevin Gurney <kg...@mathworks.com>
    Co-authored-by: sgilmore10 <74...@users.noreply.github.com>
    Co-authored-by: Kevin Gurney <ke...@gmail.com>
    Co-authored-by: Sarah Gilmore <sg...@mathworks.com>
    Co-authored-by: Sutou Kouhei <ko...@cozmixng.org>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 matlab/src/cpp/arrow/matlab/array/proxy/array.cc   | 24 +++++++++
 matlab/src/cpp/arrow/matlab/array/proxy/array.h    |  2 +
 .../cpp/arrow/matlab/array/proxy/numeric_array.h   | 22 ++++++--
 .../matlab/bit/bit_pack_matlab_logical_array.cc    | 63 ++++++++++++++++++++++
 .../bit_pack_matlab_logical_array.h}               | 29 +++-------
 .../arrow/matlab/bit/bit_unpack_arrow_buffer.cc    | 41 ++++++++++++++
 .../array.h => bit/bit_unpack_arrow_buffer.h}      | 25 ++-------
 matlab/src/matlab/+arrow/+array/Array.m            |  5 ++
 matlab/src/matlab/+arrow/+array/Float64Array.m     | 15 +++++-
 matlab/test/arrow/array/tFloat64Array.m            | 43 +++++++++++++++
 matlab/tools/cmake/BuildMatlabArrowInterface.cmake |  7 ++-
 11 files changed, 225 insertions(+), 51 deletions(-)

diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
index f003a11525..fc1d66ae24 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
@@ -17,6 +17,8 @@
 
 #include "arrow/matlab/array/proxy/array.h"
 
+#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h"
+
 namespace arrow::matlab::array::proxy {
 
     Array::Array(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
@@ -25,6 +27,7 @@ namespace arrow::matlab::array::proxy {
         REGISTER_METHOD(Array, toString);
         REGISTER_METHOD(Array, toMATLAB);
         REGISTER_METHOD(Array, length);
+        REGISTER_METHOD(Array, valid);
     }
 
     void Array::toString(libmexclass::proxy::method::Context& context) {
@@ -40,4 +43,25 @@ namespace arrow::matlab::array::proxy {
         auto length_mda = factory.createScalar(array->length());
         context.outputs[0] = length_mda;
     }
+
+    void Array::valid(libmexclass::proxy::method::Context& context) {
+        auto array_length = static_cast<size_t>(array->length());
+        
+        // If the Arrow array has no null values, then return a MATLAB
+        // logical array that is all "true" for the validity bitmap.
+        if (array->null_count() == 0) {
+            ::matlab::data::ArrayFactory factory;
+            auto validity_buffer = factory.createBuffer<bool>(array_length);
+            auto validity_buffer_ptr = validity_buffer.get();
+            std::fill(validity_buffer_ptr, validity_buffer_ptr + array_length, true);
+            auto valid_elements_mda = factory.createArrayFromBuffer<bool>({array_length, 1}, std::move(validity_buffer));
+            context.outputs[0] = valid_elements_mda;
+            return;
+        }
+
+        auto validity_bitmap = array->null_bitmap();
+        auto valid_elements_mda = arrow::matlab::bit::bitUnpackArrowBuffer(validity_bitmap, array_length);
+        context.outputs[0] = valid_elements_mda;
+    }
+
 }
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
index a0ef0a94f3..0a69f6fcad 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
@@ -35,6 +35,8 @@ class Array : public libmexclass::proxy::Proxy {
 
         void length(libmexclass::proxy::method::Context& context);
 
+        void valid(libmexclass::proxy::method::Context& context);
+
         virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0;
 
         std::shared_ptr<arrow::Array> array;
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
index 2f2a9925c0..ad2242a755 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
@@ -26,11 +26,19 @@
 #include "arrow/type_traits.h"
 
 #include "arrow/matlab/array/proxy/array.h"
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
 
 #include "libmexclass/proxy/Proxy.h"
 
 namespace arrow::matlab::array::proxy {
 
+namespace {
+const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray<bool>& valid_elements) {
+    const auto valid_elements_iterator(valid_elements.cbegin());
+    return reinterpret_cast<const uint8_t*>(valid_elements_iterator.operator->());
+}
+} // anonymous namespace
+
 template<typename CType>
 class NumericArray : public arrow::matlab::array::proxy::Array {
     public:
@@ -43,6 +51,8 @@ class NumericArray : public arrow::matlab::array::proxy::Array {
             const ::matlab::data::TypedArray<CType> numeric_mda = constructor_arguments[0];
             const ::matlab::data::TypedArray<bool> make_copy = constructor_arguments[1];
 
+            const auto has_validity_bitmap = constructor_arguments.getNumberOfElements() > 2;
+
             // Get raw pointer of mxArray
             auto it(numeric_mda.cbegin());
             auto dt = it.operator->();
@@ -50,8 +60,11 @@ class NumericArray : public arrow::matlab::array::proxy::Array {
             const auto make_deep_copy = make_copy[0];
 
             if (make_deep_copy) {
+                // Get the unpacked validity bitmap (if it exists)
+                auto unpacked_validity_bitmap = has_validity_bitmap ? getUnpackedValidityBitmap(constructor_arguments[2]) : nullptr;
+
                 BuilderType builder;
-                auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements());
+                auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements(), unpacked_validity_bitmap);
 
                 // TODO: handle error case
                 if (st.ok()) {
@@ -68,12 +81,11 @@ class NumericArray : public arrow::matlab::array::proxy::Array {
                 auto data_buffer = std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
                                                               sizeof(CType) * numeric_mda.getNumberOfElements());
 
-                // TODO: Implement null support
-                std::shared_ptr<arrow::Buffer> null_buffer = nullptr;
+                // Pack the validity bitmap values.
+                auto packed_validity_bitmap = has_validity_bitmap ? arrow::matlab::bit::bitPackMatlabLogicalArray(constructor_arguments[2]).ValueOrDie() : nullptr;
 
-                auto array_data = arrow::ArrayData::Make(data_type, length, {null_buffer, data_buffer});
+                auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer});
                 array = arrow::MakeArray(array_data);
-
             }
         }
 
diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc
new file mode 100644
index 0000000000..45c6e39347
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath> // std::ceil
+
+#include <arrow/util/bit_util.h>
+#include <arrow/util/bitmap_generate.h>
+
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
+
+namespace arrow::matlab::bit {
+
+    // Calculate the number of bytes required in the bit-packed validity buffer.
+    int64_t bitPackedLength(int64_t num_elements) {
+        // Since MATLAB logical values are encoded using a full byte (8 bits),
+        // we can divide the number of elements in the logical array by 8 to get
+        // the bit packed length.
+        return static_cast<int64_t>(std::ceil(num_elements / 8.0));
+    }
+
+    // Pack an unpacked MATLAB logical array into into a bit-packed arrow::Buffer.
+    arrow::Result<std::shared_ptr<arrow::Buffer>> bitPackMatlabLogicalArray(const ::matlab::data::TypedArray<bool> matlab_logical_array) {
+        // Validate that the input arrow::Buffer has sufficient size to store a full bit-packed
+        // representation of the input MATLAB logical array.
+        const auto unpacked_buffer_length = matlab_logical_array.getNumberOfElements();
+
+        // Compute the bit packed length from the unpacked length.
+        const auto packed_buffer_length = bitPackedLength(unpacked_buffer_length);
+
+        ARROW_ASSIGN_OR_RAISE(auto packed_validity_bitmap_buffer,  arrow::AllocateResizableBuffer(packed_buffer_length));
+
+        // Get pointers to the internal uint8_t arrays behind arrow::Buffer and mxArray
+        // Get raw bool array pointer from MATLAB logical array.
+        // Get an iterator to the raw bool data behind the MATLAB logical array.
+        auto unpacked_bool_data_iterator = matlab_logical_array.cbegin();
+
+        // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer.
+        // Call into a loop-unrolled Arrow utility for better performance when bit-packing.
+        auto generator = [&]() -> bool { return *(unpacked_bool_data_iterator++); };
+        const int64_t start_offset = 0;
+
+        auto mutable_data = packed_validity_bitmap_buffer->mutable_data();
+
+        arrow::internal::GenerateBitsUnrolled(mutable_data, start_offset, unpacked_buffer_length, generator);
+
+        return packed_validity_bitmap_buffer;
+    }
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h
similarity index 59%
copy from matlab/src/cpp/arrow/matlab/array/proxy/array.h
copy to matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h
index a0ef0a94f3..cceb22a2f3 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
+++ b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h
@@ -17,27 +17,14 @@
 
 #pragma once
 
-#include "arrow/array.h"
+#include <arrow/buffer.h>
+#include <arrow/result.h>
 
-#include "libmexclass/proxy/Proxy.h"
-
-namespace arrow::matlab::array::proxy {
-
-class Array : public libmexclass::proxy::Proxy {
-    public:
-        Array(const libmexclass::proxy::FunctionArguments& constructor_arguments);
-    
-        virtual ~Array() {}
-
-    protected:
-
-        void toString(libmexclass::proxy::method::Context& context);
-
-        void length(libmexclass::proxy::method::Context& context);
-
-        virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0;
-
-        std::shared_ptr<arrow::Array> array;
-};
+#include "MatlabDataArray.hpp"
 
+namespace arrow::matlab::bit {
+    // Calculate the number of bytes required in the bit-packed validity buffer.
+    int64_t bitPackedLength(int64_t num_elements);
+    // Pack an unpacked MATLAB logical array into into a bit-packed arrow::Buffer.
+    arrow::Result<std::shared_ptr<arrow::Buffer>> bitPackMatlabLogicalArray(const ::matlab::data::TypedArray<bool> matlab_logical_array);
 }
diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc
new file mode 100644
index 0000000000..a83cda8aca
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h"
+
+#include "arrow/util/bitmap_visit.h"
+
+namespace arrow::matlab::bit {
+    ::matlab::data::TypedArray<bool> bitUnpackArrowBuffer(const std::shared_ptr<arrow::Buffer>& packed_buffer, int64_t length) {
+        const auto packed_buffer_ptr = packed_buffer->data();
+
+        ::matlab::data::ArrayFactory factory;
+        
+        const auto array_length = static_cast<size_t>(length);
+        
+        auto unpacked_buffer = factory.createBuffer<bool>(array_length);
+        auto unpacked_buffer_ptr = unpacked_buffer.get();
+        auto visitFcn = [&](const bool is_valid) { *unpacked_buffer_ptr++ = is_valid; };
+
+        const int64_t start_offset = 0;
+        arrow::internal::VisitBitsUnrolled(packed_buffer_ptr, start_offset, length, visitFcn);
+
+        ::matlab::data::TypedArray<bool> unpacked_matlab_logical_Array = factory.createArrayFromBuffer({array_length, 1}, std::move(unpacked_buffer));
+
+        return unpacked_matlab_logical_Array;
+    }
+}
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h
similarity index 59%
copy from matlab/src/cpp/arrow/matlab/array/proxy/array.h
copy to matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h
index a0ef0a94f3..9b88cb16de 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h
+++ b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h
@@ -17,27 +17,10 @@
 
 #pragma once
 
-#include "arrow/array.h"
+#include "arrow/buffer.h"
 
-#include "libmexclass/proxy/Proxy.h"
-
-namespace arrow::matlab::array::proxy {
-
-class Array : public libmexclass::proxy::Proxy {
-    public:
-        Array(const libmexclass::proxy::FunctionArguments& constructor_arguments);
-    
-        virtual ~Array() {}
-
-    protected:
-
-        void toString(libmexclass::proxy::method::Context& context);
-
-        void length(libmexclass::proxy::method::Context& context);
-
-        virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0;
-
-        std::shared_ptr<arrow::Array> array;
-};
+#include "MatlabDataArray.hpp"
 
+namespace arrow::matlab::bit {
+    ::matlab::data::TypedArray<bool> bitUnpackArrowBuffer(const std::shared_ptr<arrow::Buffer>& packed_buffer, int64_t length);
 }
diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m
index a1778b17c2..c13c85167a 100644
--- a/matlab/src/matlab/+arrow/+array/Array.m
+++ b/matlab/src/matlab/+arrow/+array/Array.m
@@ -24,6 +24,7 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
 
     properties (Dependent)
         Length
+        Valid % Validity bitmap
     end
     
     methods
@@ -35,6 +36,10 @@ classdef (Abstract) Array < matlab.mixin.CustomDisplay & ...
             numElements = obj.Proxy.length();
         end
 
+        function validElements = get.Valid(obj)
+            validElements = obj.Proxy.valid();
+        end
+
         function matlabArray = toMATLAB(obj)
             matlabArray = obj.Proxy.toMATLAB();
         end
diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m
index 0b74f5a455..841bbcc6e7 100644
--- a/matlab/src/matlab/+arrow/+array/Float64Array.m
+++ b/matlab/src/matlab/+arrow/+array/Float64Array.m
@@ -18,6 +18,7 @@ classdef Float64Array < arrow.array.Array
 
     properties (Hidden, SetAccess=private)
         MatlabArray
+        NullSubstitionValue = NaN;
     end
 
     methods
@@ -29,13 +30,23 @@ classdef Float64Array < arrow.array.Array
 
             validateattributes(data, "double", ["2d", "nonsparse", "real"]);
             if ~isempty(data), validateattributes(data, "double", "vector"); end
-            obj@arrow.array.Array("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy});
+            % Extract missing (i.e. null) values.
+            % TODO: Determine a more robust approach to handling "detection" of null values.
+            %       For example - add a name-value pair to allow clients to choose which values
+            %       should be considered null (if any).
+            validElements = ~isnan(data);
+            obj@arrow.array.Array("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy, validElements});
             % Store a reference to the array if not doing a deep copy
             if (~opts.DeepCopy), obj.MatlabArray = data; end
         end
 
         function data = double(obj)
-            data = obj.Proxy.toMATLAB();
+            data = obj.toMATLAB();
+        end
+        
+        function matlabArray = toMATLAB(obj)
+            matlabArray = obj.Proxy.toMATLAB();
+            matlabArray(~obj.Valid) = obj.NullSubstitionValue;
         end
     end
 end
diff --git a/matlab/test/arrow/array/tFloat64Array.m b/matlab/test/arrow/array/tFloat64Array.m
index 6bd84d8f67..b166fd3195 100755
--- a/matlab/test/arrow/array/tFloat64Array.m
+++ b/matlab/test/arrow/array/tFloat64Array.m
@@ -37,5 +37,48 @@ classdef tFloat64Array < hNumericArray
             fcn = @() arrow.array.Float64Array(sparse(ones([10 1])), DeepCopy=MakeDeepCopy);
             testCase.verifyError(fcn, "MATLAB:expectedNonsparse");
         end
+
+        function ValidBasic(testCase, MakeDeepCopy)
+            % Create a MATLAB array with one null value (i.e. one NaN).
+            matlabArray = [1, NaN, 3]';
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            expectedValid = [true, false, true]';
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+        end
+
+        function ValidNoNulls(testCase, MakeDeepCopy)
+            % Create a MATLAB array with no null values (i.e. no NaNs).
+            matlabArray = [1, 2, 3]';
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            expectedValid = [true, true, true]';
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+        end
+
+        function ValidAllNulls(testCase, MakeDeepCopy)
+            % Create a MATLAB array with all null values (i.e. all NaNs).
+            matlabArray = [NaN, NaN, NaN]';
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            expectedValid = [false, false, false]';
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+        end
+
+        function ValidEmpty(testCase, MakeDeepCopy)
+            % Create an empty 0x0 MATLAB array.
+            matlabArray = double.empty(0, 0);
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            expectedValid = logical.empty(0, 1);
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+
+            % Create an empty 0x1 MATLAB array.
+            matlabArray = double.empty(0, 1);
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+
+            % Create an empty 1x0 MATLAB array.
+            matlabArray = double.empty(1, 0);
+            arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy);
+            testCase.verifyEqual(arrowArray.Valid, expectedValid);
+        end
+        
     end
 end
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 92ed955ed4..0dda3fb770 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -33,8 +33,11 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp
 
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME arrowproxy)
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp")
-set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy")
-set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc")
+set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy"
+                                                      "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit")
+set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc"
+                                                  "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc"
+                                                  "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc")
 
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc")