You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/01/28 18:28:21 UTC
[arrow] branch master updated: ARROW-4320: [C++] Add tests for
non-contiguous tensors
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 442ced0 ARROW-4320: [C++] Add tests for non-contiguous tensors
442ced0 is described below
commit 442ced05fd89d1ec6c4a40ff68b0fb50dfa3296a
Author: Kenta Murata <mr...@mrkn.jp>
AuthorDate: Mon Jan 28 19:28:13 2019 +0100
ARROW-4320: [C++] Add tests for non-contiguous tensors
I would like to add some test cases for tensors with non-contiguous strides.
Author: Kenta Murata <mr...@mrkn.jp>
Author: Antoine Pitrou <an...@python.org>
Closes #3453 from mrkn/add_strided_numeric_tensor_tests and squashes the following commits:
58417945 <Antoine Pitrou> Remove ARROW_EXPORT from template function definition.
53179ea0 <Kenta Murata> Fix compilation error
f153e0b3 <Kenta Murata> make format
8cfac940 <Kenta Murata> Refactoring: extract AssertNumericDataEqual
20c8728c <Kenta Murata> Refactoring: extract AssertCOOIndex function
c9767f78 <Kenta Murata> Add assertions to examine the strides layout
4cfef06c <Kenta Murata> Add a new test of csr sparse matrix creation from non-contiguous tensor
d9f32f1c <Kenta Murata> Add a new test of coo sparse tensor creation from non-contiguous tensor
0c1573c1 <Kenta Murata> Fix NumericTensor tests
---
cpp/src/arrow/sparse_tensor-test.cc | 159 ++++++++++++++++++++----------------
cpp/src/arrow/tensor-test.cc | 48 +++++++----
cpp/src/arrow/test-util.h | 9 ++
3 files changed, 131 insertions(+), 85 deletions(-)
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index d48f2d0..ed51f03 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -38,6 +38,15 @@ static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
}
+static inline void AssertCOOIndex(
+ const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
+ const std::vector<int64_t>& expected_values) {
+ int64_t n = static_cast<int64_t>(expected_values.size());
+ for (int64_t i = 0; i < n; ++i) {
+ ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
+ }
+}
+
TEST(TestSparseCOOTensor, CreationEmptyTensor) {
std::vector<int64_t> shape = {2, 3, 4};
SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
@@ -84,13 +93,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
- const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 1, ptr[i]);
- }
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 11, ptr[i + 6]);
- }
+ const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+ AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
@@ -99,30 +103,11 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
- // (0, 0, 0) -> 1
- ASSERT_EQ(0, sidx->Value({0, 0}));
- ASSERT_EQ(0, sidx->Value({0, 1}));
- ASSERT_EQ(0, sidx->Value({0, 2}));
-
- // (0, 0, 2) -> 2
- ASSERT_EQ(0, sidx->Value({1, 0}));
- ASSERT_EQ(0, sidx->Value({1, 1}));
- ASSERT_EQ(2, sidx->Value({1, 2}));
-
- // (0, 1, 1) -> 3
- ASSERT_EQ(0, sidx->Value({2, 0}));
- ASSERT_EQ(1, sidx->Value({2, 1}));
- ASSERT_EQ(1, sidx->Value({2, 2}));
-
- // (1, 2, 1) -> 15
- ASSERT_EQ(1, sidx->Value({10, 0}));
- ASSERT_EQ(2, sidx->Value({10, 1}));
- ASSERT_EQ(1, sidx->Value({10, 2}));
-
- // (1, 2, 3) -> 16
- ASSERT_EQ(1, sidx->Value({11, 0}));
- ASSERT_EQ(2, sidx->Value({11, 1}));
- ASSERT_EQ(3, sidx->Value({11, 2}));
+ AssertCOOIndex(sidx, 0, {0, 0, 0});
+ AssertCOOIndex(sidx, 1, {0, 0, 2});
+ AssertCOOIndex(sidx, 2, {0, 1, 1});
+ AssertCOOIndex(sidx, 10, {1, 2, 1});
+ AssertCOOIndex(sidx, 11, {1, 2, 3});
}
TEST(TestSparseCOOTensor, CreationFromTensor) {
@@ -147,43 +132,47 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
- const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 1, ptr[i]);
- }
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 11, ptr[i + 6]);
- }
+ const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+ AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
- // (0, 0, 0) -> 1
- ASSERT_EQ(0, sidx->Value({0, 0}));
- ASSERT_EQ(0, sidx->Value({0, 1}));
- ASSERT_EQ(0, sidx->Value({0, 2}));
-
- // (0, 0, 2) -> 2
- ASSERT_EQ(0, sidx->Value({1, 0}));
- ASSERT_EQ(0, sidx->Value({1, 1}));
- ASSERT_EQ(2, sidx->Value({1, 2}));
-
- // (0, 1, 1) -> 3
- ASSERT_EQ(0, sidx->Value({2, 0}));
- ASSERT_EQ(1, sidx->Value({2, 1}));
- ASSERT_EQ(1, sidx->Value({2, 2}));
-
- // (1, 2, 1) -> 15
- ASSERT_EQ(1, sidx->Value({10, 0}));
- ASSERT_EQ(2, sidx->Value({10, 1}));
- ASSERT_EQ(1, sidx->Value({10, 2}));
-
- // (1, 2, 3) -> 16
- ASSERT_EQ(1, sidx->Value({11, 0}));
- ASSERT_EQ(2, sidx->Value({11, 1}));
- ASSERT_EQ(3, sidx->Value({11, 2}));
+ AssertCOOIndex(sidx, 0, {0, 0, 0});
+ AssertCOOIndex(sidx, 1, {0, 0, 2});
+ AssertCOOIndex(sidx, 2, {0, 1, 1});
+ AssertCOOIndex(sidx, 10, {1, 2, 1});
+ AssertCOOIndex(sidx, 11, {1, 2, 3});
+}
+
+TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
+ std::vector<int64_t> shape = {2, 3, 4};
+ std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
+ 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+ 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+ std::vector<int64_t> strides = {192, 64, 16};
+ std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+ Tensor tensor(int64(), buffer, shape, strides);
+ SparseTensorImpl<SparseCOOIndex> st(tensor);
+
+ ASSERT_EQ(12, st.non_zero_length());
+ ASSERT_TRUE(st.is_mutable());
+
+ const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+ AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+ const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
+ std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
+ ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+ ASSERT_TRUE(sidx->is_column_major());
+
+ AssertCOOIndex(sidx, 0, {0, 0, 0});
+ AssertCOOIndex(sidx, 1, {0, 0, 2});
+ AssertCOOIndex(sidx, 2, {0, 1, 1});
+ AssertCOOIndex(sidx, 10, {1, 2, 1});
+ AssertCOOIndex(sidx, 11, {1, 2, 3});
}
TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
@@ -211,16 +200,10 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
- const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 1, ptr[i]);
- }
- for (int i = 0; i < 6; ++i) {
- ASSERT_EQ(i + 11, ptr[i + 6]);
- }
+ const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
+ AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
-
ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
ASSERT_EQ(1, si.indptr()->ndim());
ASSERT_EQ(1, si.indices()->ndim());
@@ -241,4 +224,40 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
}
+TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
+ std::vector<int64_t> shape = {6, 4};
+ std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
+ 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
+ 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
+ std::vector<int64_t> strides = {64, 16};
+ std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+ Tensor tensor(int64(), buffer, shape, strides);
+ SparseTensorImpl<SparseCSRIndex> st(tensor);
+
+ ASSERT_EQ(12, st.non_zero_length());
+ ASSERT_TRUE(st.is_mutable());
+
+ const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
+ AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
+
+ const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
+ ASSERT_EQ(1, si.indptr()->ndim());
+ ASSERT_EQ(1, si.indices()->ndim());
+
+ const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
+ std::vector<int64_t> indptr_values(indptr_begin,
+ indptr_begin + si.indptr()->shape()[0]);
+
+ ASSERT_EQ(7, indptr_values.size());
+ ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
+
+ const int64_t* indices_begin =
+ reinterpret_cast<const int64_t*>(si.indices()->raw_data());
+ std::vector<int64_t> indices_values(indices_begin,
+ indices_begin + si.indices()->shape()[0]);
+
+ ASSERT_EQ(12, indices_values.size());
+ ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
+}
+
} // namespace arrow
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
index a437e6d..af20aed 100644
--- a/cpp/src/arrow/tensor-test.cc
+++ b/cpp/src/arrow/tensor-test.cc
@@ -104,13 +104,16 @@ TEST(TestTensor, ZeroDimensionalTensor) {
ASSERT_EQ(t.strides().size(), 1);
}
-TEST(TestNumericTensor, ElementAccess) {
+TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
std::vector<int64_t> shape = {3, 4};
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape);
+ ASSERT_TRUE(t_i64.is_row_major());
+ ASSERT_FALSE(t_i64.is_column_major());
+ ASSERT_TRUE(t_i64.is_contiguous());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(5, t_i64.Value({1, 0}));
ASSERT_EQ(6, t_i64.Value({1, 1}));
@@ -121,22 +124,27 @@ TEST(TestNumericTensor, ElementAccess) {
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape);
+ ASSERT_TRUE(t_f32.is_row_major());
+ ASSERT_FALSE(t_f32.is_column_major());
+ ASSERT_TRUE(t_f32.is_contiguous());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}
-TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
std::vector<int64_t> shape = {3, 4};
const int64_t i64_size = sizeof(int64_t);
- std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
- 8, 0, 0, 9, 10, 11, 12, 0, 0};
- std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
+ std::vector<int64_t> values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12};
+ std::vector<int64_t> strides_i64 = {i64_size, i64_size * 3};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
+ ASSERT_TRUE(t_i64.is_column_major());
+ ASSERT_FALSE(t_i64.is_row_major());
+ ASSERT_TRUE(t_i64.is_contiguous());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -145,13 +153,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
ASSERT_EQ(11, t_i64.Value({2, 2}));
const int64_t f32_size = sizeof(float);
- std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
- 5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
- 9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
- std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
+ std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 2.1f, 6.1f, 10.1f,
+ 3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f};
+ std::vector<int64_t> strides_f32 = {f32_size, f32_size * 3};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
+ ASSERT_TRUE(t_f32.is_column_major());
+ ASSERT_FALSE(t_f32.is_row_major());
+ ASSERT_TRUE(t_f32.is_contiguous());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
@@ -160,15 +170,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}
-TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
+TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) {
std::vector<int64_t> shape = {3, 4};
const int64_t i64_size = sizeof(int64_t);
- std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
- std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
+ std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
+ 8, 0, 0, 9, 10, 11, 12, 0, 0};
+ std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
+ ASSERT_FALSE(t_i64.is_contiguous());
+ ASSERT_FALSE(t_i64.is_row_major());
+ ASSERT_FALSE(t_i64.is_column_major());
ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
@@ -177,12 +191,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
ASSERT_EQ(11, t_i64.Value({2, 2}));
const int64_t f32_size = sizeof(float);
- std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
- 3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
- std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
+ std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
+ 5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
+ 9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
+ std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
+ ASSERT_FALSE(t_f32.is_contiguous());
+ ASSERT_FALSE(t_f32.is_row_major());
+ ASSERT_FALSE(t_f32.is_column_major());
ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index aa7c73e..713ff38 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -202,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss);
ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual,
bool same_chunk_layout = true);
+template <typename C_TYPE>
+void AssertNumericDataEqual(const C_TYPE* raw_data,
+ const std::vector<C_TYPE>& expected_values) {
+ for (auto expected : expected_values) {
+ ASSERT_EQ(expected, *raw_data);
+ ++raw_data;
+ }
+}
+
ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right);
// Check if the padding of the buffers of the array is zero.