You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/06/13 21:52:51 UTC

[arrow] branch main updated: GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4653918cf2 GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)
4653918cf2 is described below

commit 4653918cf23067e540e05e71799e8004fab8c7a2
Author: Kevin Gurney <ke...@gmail.com>
AuthorDate: Tue Jun 13 17:52:41 2023 -0400

    GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)
    
    ### Rationale for this change
    
    Now that the MATLAB interface supports validity bitmaps and bit packing/unpacking (#35598), we can add support for a `BooleanArray` class. This is a follow up to the work on the `NumericArray` classes.
    
    `BooleanArray` maps to the MATLAB [`logical`](https://www.mathworks.com/help/matlab/logical-operations.html) type when calling `toMATLAB`.
    
    ### What changes are included in this PR?
    
    1. Added a new `arrow.array.BooleanArray` class that can be converted to/from a MATLAB `logical` array.
    
    **Example**:
    
    ```matlab
    >> matlabArray = logical([true, false, true])'
    
    matlabArray =
    
      3x1 logical array
    
       1
       0
       1
    
    >> arrowArray = arrow.array.BooleanArray(matlabArray)
    
    arrowArray =
    
    [
      true,
      false,
      true
    ]
    
    >> convertedArrowArray = toMATLAB(arrowArray)
    
    convertedArrowArray =
    
      3x1 logical array
    
       1
       0
       1
    
    ```
    
    ### Are these changes tested?
    
    Yes.
    
    1. Added a new `tBooleanArray.m` test class which follows the existing pattern for the `NumericArray` test classes.
    
    ### Are there any user-facing changes?
    
    Yes.
    
    1. Added a new user-facing `arrow.array.BooleanArray` class.
    
    ### Notes
    
    1. Thank you @ sgilmore10 for your help with this pull request!
    * Closes: #36040
    
    Lead-authored-by: Kevin Gurney <kg...@mathworks.com>
    Co-authored-by: Kevin Gurney <ke...@gmail.com>
    Co-authored-by: Sutou Kouhei <ko...@cozmixng.org>
    Co-authored-by: Sarah Gilmore <sg...@mathworks.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 .../cpp/arrow/matlab/array/proxy/boolean_array.cc  |  55 ++++++++
 .../cpp/arrow/matlab/array/proxy/boolean_array.h   |  40 ++++++
 matlab/src/cpp/arrow/matlab/proxy/factory.cc       |   6 +-
 matlab/src/matlab/+arrow/+array/BooleanArray.m     |  44 +++++++
 matlab/test/arrow/array/tBooleanArray.m            | 144 +++++++++++++++++++++
 matlab/tools/cmake/BuildMatlabArrowInterface.cmake |   1 +
 6 files changed, 288 insertions(+), 2 deletions(-)

diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc
new file mode 100644
index 0000000000..def8a53e80
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/matlab/array/proxy/boolean_array.h"
+
+#include "arrow/matlab/error/error.h"
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
+#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h"
+
+namespace arrow::matlab::array::proxy {
+
+        libmexclass::proxy::MakeResult BooleanArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
+            // Get the mxArray from constructor arguments
+            const ::matlab::data::TypedArray<bool> logical_mda = constructor_arguments[0];
+            const ::matlab::data::TypedArray<bool> validity_bitmap_mda = constructor_arguments[1];
+
+            // Pack the logical data values.
+            auto maybe_packed_logical_buffer = arrow::matlab::bit::bitPackMatlabLogicalArray(logical_mda);
+            MATLAB_ERROR_IF_NOT_OK(maybe_packed_logical_buffer.status(), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
+
+            // Pack the validity bitmap values.
+            auto maybe_validity_bitmap_buffer = arrow::matlab::bit::bitPackMatlabLogicalArray(validity_bitmap_mda);
+            MATLAB_ERROR_IF_NOT_OK(maybe_validity_bitmap_buffer.status(), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
+
+            const auto data_type = arrow::boolean();
+            const auto array_length = logical_mda.getNumberOfElements();
+            const auto validity_bitmap_buffer = *maybe_validity_bitmap_buffer;
+            const auto data_buffer = *maybe_packed_logical_buffer;
+
+            auto array_data = arrow::ArrayData::Make(data_type, array_length, {validity_bitmap_buffer, data_buffer});
+            return std::make_shared<arrow::matlab::array::proxy::BooleanArray>(arrow::MakeArray(array_data));
+        }
+
+        void BooleanArray::toMATLAB(libmexclass::proxy::method::Context& context) {
+            auto array_length = array->length();
+            auto packed_logical_data_buffer = std::static_pointer_cast<arrow::BooleanArray>(array)->values();
+            auto logical_array_mda = arrow::matlab::bit::bitUnpackArrowBuffer(packed_logical_data_buffer, array_length);
+            context.outputs[0] = logical_array_mda;
+        }
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h
new file mode 100644
index 0000000000..6966d1090e
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/matlab/array/proxy/array.h"
+
+#include "libmexclass/proxy/Proxy.h"
+
+namespace arrow::matlab::array::proxy {
+
+    class BooleanArray : public arrow::matlab::array::proxy::Array {
+        public:
+            BooleanArray(const std::shared_ptr<arrow::Array> logical_array)
+                : arrow::matlab::array::proxy::Array() {
+                    array = logical_array;
+                }
+
+            static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);
+
+        protected:
+            void toMATLAB(libmexclass::proxy::method::Context& context) override;
+
+    };
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
index e159c0ea37..be489b820d 100644
--- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc
+++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/matlab/array/proxy/boolean_array.h"
 #include "arrow/matlab/array/proxy/numeric_array.h"
+#include "arrow/matlab/error/error.h"
 
 #include "factory.h"
-#include "arrow/matlab/error/error.h"
-#include <iostream>
 
 namespace arrow::matlab::proxy {
 
@@ -37,6 +37,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name,
     REGISTER_PROXY(arrow.array.proxy.Int16Array  , arrow::matlab::array::proxy::NumericArray<int16_t>);
     REGISTER_PROXY(arrow.array.proxy.Int32Array  , arrow::matlab::array::proxy::NumericArray<int32_t>);
     REGISTER_PROXY(arrow.array.proxy.Int64Array  , arrow::matlab::array::proxy::NumericArray<int64_t>);
+    // Register MATLAB Proxy class for boolean arrays
+    REGISTER_PROXY(arrow.array.proxy.BooleanArray, arrow::matlab::array::proxy::BooleanArray);
 
     return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name};
 };
diff --git a/matlab/src/matlab/+arrow/+array/BooleanArray.m b/matlab/src/matlab/+arrow/+array/BooleanArray.m
new file mode 100644
index 0000000000..52fae56e2d
--- /dev/null
+++ b/matlab/src/matlab/+arrow/+array/BooleanArray.m
@@ -0,0 +1,44 @@
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements.  See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License.  You may obtain a copy of the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied.  See the License for the specific language governing
+% permissions and limitations under the License.
+
+classdef BooleanArray < arrow.array.Array
+% arrow.array.BooleanArray
+
+    properties (Hidden, SetAccess=private)
+        NullSubstitionValue = false;
+    end
+
+    methods
+        function obj = BooleanArray(data, opts)
+            arguments
+                data
+                opts.InferNulls(1,1) logical = true
+                opts.Valid
+            end
+            arrow.args.validateTypeAndShape(data, "logical");
+            validElements = arrow.args.parseValidElements(data, opts);
+            obj@arrow.array.Array("Name", "arrow.array.proxy.BooleanArray", "ConstructorArguments", {data, validElements});
+        end
+
+        function data = logical(obj)
+            data = obj.toMATLAB();
+        end
+
+        function matlabArray = toMATLAB(obj)
+            matlabArray = obj.Proxy.toMATLAB();
+            matlabArray(~obj.Valid) = obj.NullSubstitionValue;
+        end
+    end
+end
diff --git a/matlab/test/arrow/array/tBooleanArray.m b/matlab/test/arrow/array/tBooleanArray.m
new file mode 100644
index 0000000000..2e8719a1e7
--- /dev/null
+++ b/matlab/test/arrow/array/tBooleanArray.m
@@ -0,0 +1,144 @@
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements.  See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License.  You may obtain a copy of the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied.  See the License for the specific language governing
+% permissions and limitations under the License.
+    
+classdef tBooleanArray < matlab.unittest.TestCase
+% Test class for arrow.array.BooleanArray
+
+      properties
+        ArrowArrayClassName(1, 1) string = "arrow.array.BooleanArray"
+        ArrowArrayConstructor = @arrow.array.BooleanArray
+        MatlabArrayFcn = @logical
+        MatlabConversionFcn = @logical
+        NullSubstitutionValue(1, 1) = false
+    end
+
+    methods(TestClassSetup)
+        function verifyOnMatlabPath(tc)
+        % Verify the arrow array class is on the MATLAB Search Path.
+            tc.assertTrue(~isempty(which(tc.ArrowArrayClassName)), ...
+                """" + tc.ArrowArrayClassName + """must be on the MATLAB path. " + ...
+                "Use ""addpath"" to add folders to the MATLAB path.");
+        end
+    end
+
+    methods(Test)
+        function BasicTest(tc)
+            A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+            className = string(class(A));
+            tc.verifyEqual(className, tc.ArrowArrayClassName);
+        end
+
+        function ToMATLAB(tc)
+            % Create array from a scalar
+            A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(true));
+            data = toMATLAB(A1);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(true));
+
+            % Create array from a vector
+            A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+            data = toMATLAB(A2);
+            tc.verifyEqual(data, tc.MatlabArrayFcn([true false true]'));
+
+            % Create a BooleanArray from an empty 0x0 logical vector
+            A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 0)));
+            data = toMATLAB(A3);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+            % Create a BooleanArray from an empty 0x1 logical vector
+            A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+            data = toMATLAB(A4);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+            % Create a BooleanArray from an empty 1x0 logical vector
+            A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+            data = toMATLAB(A5);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+        end
+
+        function MatlabConversion(tc)
+        % Tests the type-specific conversion method (i.e. logical)
+
+            % Create array from a scalar
+            A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(true));
+            data = tc.MatlabConversionFcn(A1);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(true));
+
+            % Create array from a vector
+            A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+            data = tc.MatlabConversionFcn(A2);
+            tc.verifyEqual(data, tc.MatlabArrayFcn([true false true]'));
+
+            % Create a BooleanArray from an empty 0x0 logical vector
+            A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 0)));
+            data = tc.MatlabConversionFcn(A3);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+            % Create a BooleanArray from an empty 0x1 logical vector
+            A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+            data = tc.MatlabConversionFcn(A4);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+            % Create a BooleanArray from an empty 1x0 logical vector
+            A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+            data = tc.MatlabConversionFcn(A5);
+            tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+        end
+
+        function LogicalValidNVPair(tc)
+            % Verify the expected elements are treated as null when Valid
+            % is provided as a logical array
+            data = tc.MatlabArrayFcn([true false true]');
+            arrowArray = tc.ArrowArrayConstructor(data, Valid=[false true true]);
+
+            expectedData = data;
+            expectedData(1) = tc.NullSubstitutionValue;
+            tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData);
+            tc.verifyEqual(toMATLAB(arrowArray), expectedData);
+            tc.verifyEqual(arrowArray.Valid, [false; true; true]);
+        end
+
+        function NumericValidNVPair(tc)
+            % Verify the expected elements are treated as null when Valid
+            % is provided as a array of indices
+            data = tc.MatlabArrayFcn([true false true]');
+            arrowArray = tc.ArrowArrayConstructor(data, Valid=[1, 2]);
+
+            expectedData = data;
+            expectedData(3) = tc.NullSubstitutionValue;
+            tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData);
+            tc.verifyEqual(toMATLAB(arrowArray), expectedData);
+            tc.verifyEqual(arrowArray.Valid, [true; true; false]);
+        end
+
+        function ErrorIfNonVector(tc)
+            data = tc.MatlabArrayFcn([true false true false true false true false true]);
+            data = reshape(data, 3, 1, 3);
+            fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn(data));
+            tc.verifyError(fcn, "MATLAB:expectedVector");
+        end
+
+        function ErrorIfEmptyArrayIsNotTwoDimensional(tc)
+            data = tc.MatlabArrayFcn(reshape(logical.empty(0, 0), [1 0 0]));
+            fcn = @() tc.ArrowArrayConstructor(data);
+            tc.verifyError(fcn, "MATLAB:expected2D");
+        end
+
+        function ErrorIfSparseArray(tc)
+            data = tc.MatlabArrayFcn(sparse([true false true]));
+            fcn = @() tc.ArrowArrayConstructor(data);
+            tc.verifyError(fcn, "MATLAB:expectedNonsparse");
+        end
+    end
+end
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 1f4ab05b06..01bd602afc 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -38,6 +38,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c
                                                       "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit"
                                                       "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error")
 set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc"
+                                                  "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc"
                                                   "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc"
                                                   "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc")