You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/05/09 15:34:07 UTC
[arrow] branch main updated: GH-35411: [MATLAB] Create a templated C++ Proxy Class for Numeric Arrays (#35479)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b372242b68 GH-35411: [MATLAB] Create a templated C++ Proxy Class for Numeric Arrays (#35479)
b372242b68 is described below
commit b372242b686d0cc6eace5bd912dbf460edfc36e1
Author: sgilmore10 <74...@users.noreply.github.com>
AuthorDate: Tue May 9 11:33:59 2023 -0400
GH-35411: [MATLAB] Create a templated C++ Proxy Class for Numeric Arrays (#35479)
### Rationale for this change
This pull request is a followup to #34563. To facilitate the implementation of future array types, we would like to first create a C++ template class for numeric arrays. We also want to start adding basic tests for the array functionality in the MATLAB interface.
### What changes are included in this PR?
1. Added a C++ template Class called `NumericArray` templated on `CType`.
2. Re-implemented the `Float64Array` C++ proxy class in terms of the new template class, i.e. `NumericArray<double>`.
3. Added a method called `double()` on the MATLAB Float64Array class to convert the arrow.[Type]Array to a MATLAB `double` array.
4. Added basic tests for round-tripping float64 arrays.
5. Created a base C++ proxy `Array` class that all proxy array classes will inherit from.
6. Renamed `Print()` to `ToString()` and made it return a string instead of printing to the screen.
### Are these changes tested?
Yes, we added automated test cases to the test class `tFloat64Array.m`. In addition, we manually qualified these changes on macOS.
### Are there any user-facing changes?
Yes, the `Print()` method is no longer public and there is now a method called `double()` on `arrow.array.Float64Array`.
Included below is a simple example of using the `double()` method:
```matlab
>> arrowArray = arrow.array.Float64Array([1, 2, 3])
arrowArray =
[
1,
2,
3
]
>> matlabArray = double(arrowArray)
matlabArray =
1
2
3
>> class(arrowArray)
ans =
'arrow.array.Float64Array'
>> class(matlabArray)
ans =
'double'
```
### Future Directions
1. Support the rest of the numeric types.
2. Add an abstract MATLAB base class called `arrow.array.Array`.
3. Continue building out the methods (e.g. `length()`)
4. Support `null` values (validity bitmap).
5. Handle converting non-ascii characters from `UTF-8` to `UTF-16`.
6. Handle errors in the C++ layer.
* Closes: #35411
Lead-authored-by: Sarah Gilmore <sg...@mathworks.com>
Co-authored-by: sgilmore10 <74...@users.noreply.github.com>
Co-authored-by: Kevin Gurney <kg...@mathworks.com>
Co-authored-by: Sutou Kouhei <ko...@cozmixng.org>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
.../array/proxy/{float64_array.cc => array.cc} | 22 +++--
.../array/proxy/{float64_array.cc => array.h} | 26 ++++--
.../cpp/arrow/matlab/array/proxy/float64_array.h | 60 -------------
.../cpp/arrow/matlab/array/proxy/numeric_array.h | 97 ++++++++++++++++++++++
matlab/src/cpp/arrow/matlab/proxy/factory.cc | 4 +-
matlab/src/matlab/+arrow/+array/Float64Array.m | 27 ++++--
matlab/test/arrow/array/tFloat64Array.m | 81 ++++++++++++++++--
matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 3 +-
8 files changed, 231 insertions(+), 89 deletions(-)
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
similarity index 61%
copy from matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc
copy to matlab/src/cpp/arrow/matlab/array/proxy/array.cc
index 4f25912b9e..865dd694b7 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc
@@ -15,11 +15,23 @@
// specific language governing permissions and limitations
// under the License.
-#include "float64_array.h"
+#include "arrow/matlab/array/proxy/array.h"
namespace arrow::matlab::array::proxy {
-void Float64Array::Print(libmexclass::proxy::method::Context& context) {
- // TODO: Return an MDA string representation of the Arrow array.
- std::cout << array->ToString() << std::endl;
+
+ Array::Array(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
+
+ // Register Proxy methods.
+ REGISTER_METHOD(Array, ToString);
+ REGISTER_METHOD(Array, ToMatlab);
+
+ }
+
+ void Array::ToString(libmexclass::proxy::method::Context& context) {
+ ::matlab::data::ArrayFactory factory;
+
+ // TODO: handle non-ascii characters
+ auto str_mda = factory.createScalar(array->ToString());
+ context.outputs[0] = str_mda;
+ }
}
-} // namespace arrow::matlab::array::proxy
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
similarity index 64%
rename from matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc
rename to matlab/src/cpp/arrow/matlab/array/proxy/array.h
index 4f25912b9e..8a16630348 100644
--- a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.cc
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h
@@ -15,11 +15,27 @@
// specific language governing permissions and limitations
// under the License.
-#include "float64_array.h"
+#pragma once
+
+#include "arrow/array.h"
+
+#include "libmexclass/proxy/Proxy.h"
namespace arrow::matlab::array::proxy {
-void Float64Array::Print(libmexclass::proxy::method::Context& context) {
- // TODO: Return an MDA string representation of the Arrow array.
- std::cout << array->ToString() << std::endl;
+
+class Array : public libmexclass::proxy::Proxy {
+ public:
+ Array(const libmexclass::proxy::FunctionArguments& constructor_arguments);
+
+ virtual ~Array() {}
+
+ protected:
+
+ void ToString(libmexclass::proxy::method::Context& context);
+
+ virtual void ToMatlab(libmexclass::proxy::method::Context& context) = 0;
+
+ std::shared_ptr<arrow::Array> array;
+};
+
}
-} // namespace arrow::matlab::array::proxy
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.h
deleted file mode 100644
index 3d6e449326..0000000000
--- a/matlab/src/cpp/arrow/matlab/array/proxy/float64_array.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "arrow/array.h"
-#include "arrow/builder.h"
-
-#include "libmexclass/proxy/Proxy.h"
-
-namespace arrow::matlab::array::proxy {
-class Float64Array : public libmexclass::proxy::Proxy {
- public:
- Float64Array(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
- // Get the mxArray from constructor arguments
- const ::matlab::data::TypedArray<double> double_mda = constructor_arguments[0];
-
- // Get raw pointer of mxArray
- auto it(double_mda.cbegin());
- auto dt = it.operator->();
-
- // Pass pointer to Arrow array constructor that takes a buffer
- // Do not make a copy when creating arrow::Buffer
- std::shared_ptr<arrow::Buffer> buffer(
- new arrow::Buffer(reinterpret_cast<const uint8_t*>(dt),
- sizeof(double) * double_mda.getNumberOfElements()));
-
- // Construct arrow::NumericArray specialization using arrow::Buffer.
- // pass in nulls information...we could compute and provide the number of nulls here too
- std::shared_ptr<arrow::Array> array_wrapper(
- new arrow::NumericArray<arrow::DoubleType>(double_mda.getNumberOfElements(), buffer,
- nullptr, // TODO: fill validity bitmap with data
- -1));
-
- array = array_wrapper;
-
- // Register Proxy methods.
- REGISTER_METHOD(Float64Array, Print);
- }
- private:
- void Print(libmexclass::proxy::method::Context& context);
-
- // "Raw" arrow::Array
- std::shared_ptr<arrow::Array> array;
-};
-} // namespace arrow::matlab::array::proxy
diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
new file mode 100644
index 0000000000..f7f19a60b0
--- /dev/null
+++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+
+#include "arrow/array.h"
+#include "arrow/array/data.h"
+#include "arrow/array/util.h"
+
+#include "arrow/builder.h"
+#include "arrow/type_traits.h"
+
+#include "arrow/matlab/array/proxy/array.h"
+
+#include "libmexclass/proxy/Proxy.h"
+
+namespace arrow::matlab::array::proxy {
+
+template<typename CType>
+class NumericArray : public arrow::matlab::array::proxy::Array {
+ public:
+ NumericArray(const libmexclass::proxy::FunctionArguments& constructor_arguments)
+ : arrow::matlab::array::proxy::Array(constructor_arguments) {
+ using ArrowType = typename arrow::CTypeTraits<CType>::ArrowType;
+ using BuilderType = typename arrow::CTypeTraits<CType>::BuilderType;
+
+ // Get the mxArray from constructor arguments
+ const ::matlab::data::TypedArray<CType> numeric_mda = constructor_arguments[0];
+ const ::matlab::data::TypedArray<bool> make_copy = constructor_arguments[1];
+
+ // Get raw pointer of mxArray
+ auto it(numeric_mda.cbegin());
+ auto dt = it.operator->();
+
+ const auto make_deep_copy = make_copy[0];
+
+ if (make_deep_copy) {
+ BuilderType builder;
+ auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements());
+
+ // TODO: handle error case
+ if (st.ok()) {
+ auto maybe_array = builder.Finish();
+ if (maybe_array.ok()) {
+ array = *maybe_array;
+ }
+ }
+ } else {
+ const auto data_type = arrow::CTypeTraits<CType>::type_singleton();
+ const auto length = static_cast<int64_t>(numeric_mda.getNumberOfElements()); // cast size_t to int64_t
+
+ // Do not make a copy when creating arrow::Buffer
+ auto data_buffer = std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(dt),
+ sizeof(CType) * numeric_mda.getNumberOfElements());
+
+ // TODO: Implement null support
+ std::shared_ptr<arrow::Buffer> null_buffer = nullptr;
+
+ auto array_data = arrow::ArrayData::Make(data_type, length, {null_buffer, data_buffer});
+ array = arrow::MakeArray(array_data);
+
+ }
+ }
+
+ protected:
+ void ToMatlab(libmexclass::proxy::method::Context& context) override {
+ using ArrowArrayType = typename arrow::CTypeTraits<CType>::ArrayType;
+
+ const auto num_elements = static_cast<size_t>(array->length());
+ const auto numeric_array = std::static_pointer_cast<ArrowArrayType>(array);
+ const CType* const data_begin = numeric_array->raw_values();
+ const CType* const data_end = data_begin + num_elements;
+
+ ::matlab::data::ArrayFactory factory;
+
+ // Constructs a TypedArray from the raw values. Makes a copy.
+ ::matlab::data::TypedArray<CType> result = factory.createArray({num_elements, 1}, data_begin, data_end);
+ context.outputs[0] = result;
+ }
+};
+
+}
diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
index 9bf7ec2dc9..c767d7dc37 100644
--- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc
+++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "arrow/matlab/array/proxy/float64_array.h"
+#include "arrow/matlab/array/proxy/numeric_array.h"
#include "factory.h"
@@ -26,7 +26,7 @@ namespace arrow::matlab::proxy {
std::shared_ptr<Proxy> Factory::make_proxy(const ClassName& class_name, const FunctionArguments& constructor_arguments) {
// Register MATLAB Proxy classes with corresponding C++ Proxy classes.
- REGISTER_PROXY(arrow.array.proxy.Float64Array, arrow::matlab::array::proxy::Float64Array);
+ REGISTER_PROXY(arrow.array.proxy.Float64Array, arrow::matlab::array::proxy::NumericArray<double>);
// TODO: Decide what to do in the case that there isn't a Proxy match.
std::cout << "Did not find a matching C++ proxy for: " + class_name << std::endl;
diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m
index 71d56e6cc0..c48ee45574 100644
--- a/matlab/src/matlab/+arrow/+array/Float64Array.m
+++ b/matlab/src/matlab/+arrow/+array/Float64Array.m
@@ -20,25 +20,38 @@ classdef Float64Array < matlab.mixin.CustomDisplay
Proxy
end
- properties (Access=private)
+ properties (Hidden, SetAccess=private)
MatlabArray
end
methods
- function obj = Float64Array(matlabArray)
- obj.MatlabArray = matlabArray;
- obj.Proxy = libmexclass.proxy.Proxy("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {obj.MatlabArray});
+ function obj = Float64Array(data, opts)
+ arguments
+ data
+ opts.DeepCopy = false
+ end
+
+ validateattributes(data, "double", ["2d", "nonsparse", "real"]);
+ if ~isempty(data), validateattributes(data, "double", "vector"); end
+ % Store a reference to the array if not doing a deep copy
+ if (~opts.DeepCopy), obj.MatlabArray = data; end
+ obj.Proxy = libmexclass.proxy.Proxy("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy});
end
- function Print(obj)
- obj.Proxy.Print();
+ function data = double(obj)
+ data = obj.Proxy.ToMatlab();
end
end
methods (Access=protected)
function displayScalarObject(obj)
- obj.Print();
+ disp(obj.ToString());
end
end
+ methods (Access=private)
+ function str = ToString(obj)
+ str = obj.Proxy.ToString();
+ end
+ end
end
diff --git a/matlab/test/arrow/array/tFloat64Array.m b/matlab/test/arrow/array/tFloat64Array.m
index f104ad4a7d..7e1a878c42 100755
--- a/matlab/test/arrow/array/tFloat64Array.m
+++ b/matlab/test/arrow/array/tFloat64Array.m
@@ -16,6 +16,10 @@ classdef tFloat64Array < matlab.unittest.TestCase
% implied. See the License for the specific language governing
% permissions and limitations under the License.
+ properties (TestParameter)
+ MakeDeepCopy = {true false}
+ end
+
methods(TestClassSetup)
function verifyOnMatlabPath(testCase)
% arrow.array.Float64Array must be on the MATLAB path.
@@ -24,18 +28,77 @@ classdef tFloat64Array < matlab.unittest.TestCase
end
end
- methods(TestMethodSetup)
- function setupTempWorkingDirectory(testCase)
- import matlab.unittest.fixtures.WorkingFolderFixture;
- testCase.applyFixture(WorkingFolderFixture);
- end
- end
-
methods(Test)
- function Basic(testCase)
- A = arrow.array.Float64Array([1, 2, 3]);
+ function Basic(testCase, MakeDeepCopy)
+ A = arrow.array.Float64Array([1, 2, 3], DeepCopy=MakeDeepCopy);
className = string(class(A));
testCase.verifyEqual(className, "arrow.array.Float64Array");
end
+
+ function ShallowCopy(testCase)
+ % By default, Float64Array does not create a deep copy on
+ % construction when constructed from a MATLAB array. Instead,
+ % it stores a shallow copy of the array keep the memory alive.
+ A = arrow.array.Float64Array([1, 2, 3]);
+ testCase.verifyEqual(A.MatlabArray, [1 2 3]);
+ testCase.verifyEqual(double(A), [1 2 3]');
+
+ A = arrow.array.Float64Array([1, 2, 3], DeepCopy=false);
+ testCase.verifyEqual(A.MatlabArray, [1 2 3]);
+ testCase.verifyEqual(double(A), [1 2 3]');
+ end
+
+ function DeepCopy(testCase)
+ % Verify Float64Array does not store shallow copy of the MATLAB
+ % array if DeepCopy=true was supplied.
+ A = arrow.array.Float64Array([1, 2, 3], DeepCopy=true);
+ testCase.verifyEqual(A.MatlabArray, []);
+ testCase.verifyEqual(double(A), [1 2 3]');
+ end
+
+ function Double(testCase, MakeDeepCopy)
+ % Create a Float64Array from a scalar double
+ A1 = arrow.array.Float64Array(100, DeepCopy=MakeDeepCopy);
+ data = double(A1);
+ testCase.verifyEqual(data, 100);
+
+ % Create a Float64Array from a double vector
+ A2 = arrow.array.Float64Array([1 2 3], DeepCopy=MakeDeepCopy);
+ data = double(A2);
+ testCase.verifyEqual(data, [1 2 3]');
+
+ % Create a Float64Array from an empty double vector
+ A3 = arrow.array.Float64Array([], DeepCopy=MakeDeepCopy);
+ data = double(A3);
+ testCase.verifyEqual(data, double.empty(0, 1));
+ end
+
+ function MinValue(testCase, MakeDeepCopy)
+ A1 = arrow.array.Float64Array(realmin, DeepCopy=MakeDeepCopy);
+ data = double(A1);
+ testCase.verifyEqual(data, realmin);
+ end
+
+ function MaxValue(testCase, MakeDeepCopy)
+ A1 = arrow.array.Float64Array(realmax, DeepCopy=MakeDeepCopy);
+ data = double(A1);
+ testCase.verifyEqual(data, realmax);
+ end
+
+ function InfValues(testCase, MakeDeepCopy)
+ A1 = arrow.array.Float64Array([Inf -Inf], DeepCopy=MakeDeepCopy);
+ data = double(A1);
+ testCase.verifyEqual(data, [Inf -Inf]');
+ end
+
+ function ErrorIfComplex(testCase, MakeDeepCopy)
+ fcn = @() arrow.array.Float64Array([10 + 1i, 4], DeepCopy=MakeDeepCopy);
+ testCase.verifyError(fcn, "MATLAB:expectedReal");
+ end
+
+ function ErrorIfSparse(testCase, MakeDeepCopy)
+ fcn = @() arrow.array.Float64Array(sparse(ones([10 1])), DeepCopy=MakeDeepCopy);
+ testCase.verifyError(fcn, "MATLAB:expectedNonsparse");
+ end
end
end
diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
index 59c48c0235..92ed955ed4 100644
--- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
+++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake
@@ -34,7 +34,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME arrowproxy)
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp")
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy")
-set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/float64_array.cc")
+set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc")
+
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc")
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_INCLUDE_DIRS ${MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR}