You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/06/13 21:52:51 UTC
[arrow] branch main updated: GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4653918cf2 GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)
4653918cf2 is described below
commit 4653918cf23067e540e05e71799e8004fab8c7a2
Author: Kevin Gurney <ke...@gmail.com>
AuthorDate: Tue Jun 13 17:52:41 2023 -0400
GH-36040: [MATLAB] Add `arrow.array.BooleanArray` class (#36041)
### Rationale for this change
Now that the MATLAB interface supports validity bitmaps and bit packing/unpacking (#35598), we can add support for a `BooleanArray` class. This is a follow up to the work on the `NumericArray` classes.
`BooleanArray` maps to the MATLAB [`logical`](https://www.mathworks.com/help/matlab/logical-operations.html) type when calling `toMATLAB`.
### What changes are included in this PR?
1. Added a new `arrow.array.BooleanArray` class that can be converted to/from a MATLAB `logical` array.
**Example**:
```matlab
>> matlabArray = logical([true, false, true])'
matlabArray =
3x1 logical array
1
0
1
>> arrowArray = arrow.array.BooleanArray(matlabArray)
arrowArray =
[
true,
false,
true
]
>> convertedArrowArray = toMATLAB(arrowArray)
convertedArrowArray =
3x1 logical array
1
0
1
```
### Are these changes tested?
Yes.
1. Added a new `tBooleanArray.m` test class which follows the existing pattern for the `NumericArray` test classes.
### Are there any user-facing changes?
Yes.
1. Added a new user-facing `arrow.array.BooleanArray` class.
### Notes
1. Thank you @ sgilmore10 for your help with this pull request!
* Closes: #36040
Lead-authored-by: Kevin Gurney <kg...@mathworks.com>
Co-authored-by: Kevin Gurney <ke...@gmail.com>
Co-authored-by: Sutou Kouhei <ko...@cozmixng.org>
Co-authored-by: Sarah Gilmore <sg...@mathworks.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
.../cpp/arrow/matlab/array/proxy/boolean_array.cc | 55 ++++++++
.../cpp/arrow/matlab/array/proxy/boolean_array.h | 40 ++++++
matlab/src/cpp/arrow/matlab/proxy/factory.cc | 6 +-
matlab/src/matlab/+arrow/+array/BooleanArray.m | 44 +++++++
matlab/test/arrow/array/tBooleanArray.m | 144 +++++++++++++++++++++
matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 1 +
6 files changed, 288 insertions(+), 2 deletions(-)
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc
new file mode 100644
index 0000000000..def8a53e80
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/matlab/array/proxy/boolean_array.h"
+
+#include "arrow/matlab/error/error.h"
+#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h"
+#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h"
+
+namespace arrow::matlab::array::proxy {
+
+ libmexclass::proxy::MakeResult BooleanArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
+ // Get the mxArray from constructor arguments
+ const ::matlab::data::TypedArray<bool> logical_mda = constructor_arguments[0];
+ const ::matlab::data::TypedArray<bool> validity_bitmap_mda = constructor_arguments[1];
+
+ // Pack the logical data values.
+ auto maybe_packed_logical_buffer = arrow::matlab::bit::bitPackMatlabLogicalArray(logical_mda);
+ MATLAB_ERROR_IF_NOT_OK(maybe_packed_logical_buffer.status(), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
+
+ // Pack the validity bitmap values.
+ auto maybe_validity_bitmap_buffer = arrow::matlab::bit::bitPackMatlabLogicalArray(validity_bitmap_mda);
+ MATLAB_ERROR_IF_NOT_OK(maybe_validity_bitmap_buffer.status(), error::BITPACK_VALIDITY_BITMAP_ERROR_ID);
+
+ const auto data_type = arrow::boolean();
+ const auto array_length = logical_mda.getNumberOfElements();
+ const auto validity_bitmap_buffer = *maybe_validity_bitmap_buffer;
+ const auto data_buffer = *maybe_packed_logical_buffer;
+
+ auto array_data = arrow::ArrayData::Make(data_type, array_length, {validity_bitmap_buffer, data_buffer});
+ return std::make_shared<arrow::matlab::array::proxy::BooleanArray>(arrow::MakeArray(array_data));
+ }
+
+ void BooleanArray::toMATLAB(libmexclass::proxy::method::Context& context) {
+ auto array_length = array->length();
+ auto packed_logical_data_buffer = std::static_pointer_cast<arrow::BooleanArray>(array)->values();
+ auto logical_array_mda = arrow::matlab::bit::bitUnpackArrowBuffer(packed_logical_data_buffer, array_length);
+ context.outputs[0] = logical_array_mda;
+ }
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h
new file mode 100644
index 0000000000..6966d1090e
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/matlab/array/proxy/array.h"
+
+#include "libmexclass/proxy/Proxy.h"
+
+namespace arrow::matlab::array::proxy {
+
+ class BooleanArray : public arrow::matlab::array::proxy::Array {
+ public:
+ BooleanArray(const std::shared_ptr<arrow::Array> logical_array)
+ : arrow::matlab::array::proxy::Array() {
+ array = logical_array;
+ }
+
+ static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);
+
+ protected:
+ void toMATLAB(libmexclass::proxy::method::Context& context) override;
+
+ };
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
index e159c0ea37..be489b820d 100644
--- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc
+++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
@@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+#include "arrow/matlab/array/proxy/boolean_array.h"
#include "arrow/matlab/array/proxy/numeric_array.h"
+#include "arrow/matlab/error/error.h"
#include "factory.h"
-#include "arrow/matlab/error/error.h"
-#include <iostream>
namespace arrow::matlab::proxy {
@@ -37,6 +37,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name,
REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray<int16_t>);
REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray<int32_t>);
REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray<int64_t>);
+ // Register MATLAB Proxy class for boolean arrays
+ REGISTER_PROXY(arrow.array.proxy.BooleanArray, arrow::matlab::array::proxy::BooleanArray);
return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name};
};
diff --git a/matlab/src/matlab/+arrow/+array/BooleanArray.m b/matlab/src/matlab/+arrow/+array/BooleanArray.m
new file mode 100644
index 0000000000..52fae56e2d
--- /dev/null
+++ b/matlab/src/matlab/+arrow/+array/BooleanArray.m
@@ -0,0 +1,44 @@
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements. See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License. You may obtain a copy of the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied. See the License for the specific language governing
+% permissions and limitations under the License.
+
+classdef BooleanArray < arrow.array.Array
+% arrow.array.BooleanArray
+
+ properties (Hidden, SetAccess=private)
+ NullSubstitionValue = false;
+ end
+
+ methods
+ function obj = BooleanArray(data, opts)
+ arguments
+ data
+ opts.InferNulls(1,1) logical = true
+ opts.Valid
+ end
+ arrow.args.validateTypeAndShape(data, "logical");
+ validElements = arrow.args.parseValidElements(data, opts);
+ obj@arrow.array.Array("Name", "arrow.array.proxy.BooleanArray", "ConstructorArguments", {data, validElements});
+ end
+
+ function data = logical(obj)
+ data = obj.toMATLAB();
+ end
+
+ function matlabArray = toMATLAB(obj)
+ matlabArray = obj.Proxy.toMATLAB();
+ matlabArray(~obj.Valid) = obj.NullSubstitionValue;
+ end
+ end
+end
diff --git a/matlab/test/arrow/array/tBooleanArray.m b/matlab/test/arrow/array/tBooleanArray.m
new file mode 100644
index 0000000000..2e8719a1e7
--- /dev/null
+++ b/matlab/test/arrow/array/tBooleanArray.m
@@ -0,0 +1,144 @@
+% Licensed to the Apache Software Foundation (ASF) under one or more
+% contributor license agreements. See the NOTICE file distributed with
+% this work for additional information regarding copyright ownership.
+% The ASF licenses this file to you under the Apache License, Version
+% 2.0 (the "License"); you may not use this file except in compliance
+% with the License. You may obtain a copy of the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+% implied. See the License for the specific language governing
+% permissions and limitations under the License.
+
+classdef tBooleanArray < matlab.unittest.TestCase
+% Test class for arrow.array.BooleanArray
+
+ properties
+ ArrowArrayClassName(1, 1) string = "arrow.array.BooleanArray"
+ ArrowArrayConstructor = @arrow.array.BooleanArray
+ MatlabArrayFcn = @logical
+ MatlabConversionFcn = @logical
+ NullSubstitutionValue(1, 1) = false
+ end
+
+ methods(TestClassSetup)
+ function verifyOnMatlabPath(tc)
+ % Verify the arrow array class is on the MATLAB Search Path.
+ tc.assertTrue(~isempty(which(tc.ArrowArrayClassName)), ...
+ """" + tc.ArrowArrayClassName + """must be on the MATLAB path. " + ...
+ "Use ""addpath"" to add folders to the MATLAB path.");
+ end
+ end
+
+ methods(Test)
+ function BasicTest(tc)
+ A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+ className = string(class(A));
+ tc.verifyEqual(className, tc.ArrowArrayClassName);
+ end
+
+ function ToMATLAB(tc)
+ % Create array from a scalar
+ A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(true));
+ data = toMATLAB(A1);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(true));
+
+ % Create array from a vector
+ A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+ data = toMATLAB(A2);
+ tc.verifyEqual(data, tc.MatlabArrayFcn([true false true]'));
+
+ % Create a BooleanArray from an empty 0x0 logical vector
+ A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 0)));
+ data = toMATLAB(A3);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+ % Create a BooleanArray from an empty 0x1 logical vector
+ A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+ data = toMATLAB(A4);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+ % Create a BooleanArray from an empty 1x0 logical vector
+ A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+ data = toMATLAB(A5);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+ end
+
+ function MatlabConversion(tc)
+ % Tests the type-specific conversion method (i.e. logical)
+
+ % Create array from a scalar
+ A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(true));
+ data = tc.MatlabConversionFcn(A1);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(true));
+
+ % Create array from a vector
+ A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([true false true]));
+ data = tc.MatlabConversionFcn(A2);
+ tc.verifyEqual(data, tc.MatlabArrayFcn([true false true]'));
+
+ % Create a BooleanArray from an empty 0x0 logical vector
+ A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 0)));
+ data = tc.MatlabConversionFcn(A3);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+ % Create a BooleanArray from an empty 0x1 logical vector
+ A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+ data = tc.MatlabConversionFcn(A4);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+
+ % Create a BooleanArray from an empty 1x0 logical vector
+ A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(logical.empty(0, 1)));
+ data = tc.MatlabConversionFcn(A5);
+ tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1)));
+ end
+
+ function LogicalValidNVPair(tc)
+ % Verify the expected elements are treated as null when Valid
+ % is provided as a logical array
+ data = tc.MatlabArrayFcn([true false true]');
+ arrowArray = tc.ArrowArrayConstructor(data, Valid=[false true true]);
+
+ expectedData = data;
+ expectedData(1) = tc.NullSubstitutionValue;
+ tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData);
+ tc.verifyEqual(toMATLAB(arrowArray), expectedData);
+ tc.verifyEqual(arrowArray.Valid, [false; true; true]);
+ end
+
+ function NumericValidNVPair(tc)
+ % Verify the expected elements are treated as null when Valid
+ % is provided as a array of indices
+ data = tc.MatlabArrayFcn([true false true]');
+ arrowArray = tc.ArrowArrayConstructor(data, Valid=[1, 2]);
+
+ expectedData = data;
+ expectedData(3) = tc.NullSubstitutionValue;
+ tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData);
+ tc.verifyEqual(toMATLAB(arrowArray), expectedData);
+ tc.verifyEqual(arrowArray.Valid, [true; true; false]);
+ end
+
+ function ErrorIfNonVector(tc)
+ data = tc.MatlabArrayFcn([true false true false true false true false true]);
+ data = reshape(data, 3, 1, 3);
+ fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn(data));
+ tc.verifyError(fcn, "MATLAB:expectedVector");
+ end
+
+ function ErrorIfEmptyArrayIsNotTwoDimensional(tc)
+ data = tc.MatlabArrayFcn(reshape(logical.empty(0, 0), [1 0 0]));
+ fcn = @() tc.ArrowArrayConstructor(data);
+ tc.verifyError(fcn, "MATLAB:expected2D");
+ end
+
+ function ErrorIfSparseArray(tc)
+ data = tc.MatlabArrayFcn(sparse([true false true]));
+ fcn = @() tc.ArrowArrayConstructor(data);
+ tc.verifyError(fcn, "MATLAB:expectedNonsparse");
+ end
+ end
+end
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 1f4ab05b06..01bd602afc 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -38,6 +38,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error")
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc"
+ "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc"
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc")