You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2019/02/18 15:04:27 UTC
[arrow] branch master updated: ARROW-4265: [C++] Automatic
conversion between Table and std::vector>
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 240c469 ARROW-4265: [C++] Automatic conversion between Table and std::vector<std::tuple<..>>
240c469 is described below
commit 240c46959ac631d646d98ac57aaa719a63a9dc97
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Mon Feb 18 16:04:17 2019 +0100
ARROW-4265: [C++] Automatic conversion between Table and std::vector<std::tuple<..>>
This enables conversions between a `std::vector<std::tuple<…>>` like and `arrow::Table`.
tuple to Table:
```cpp
std::vector<std::tuple<double, std::string>> rows = ..
std::shared_ptr<Table> table;
if (!arrow::stl::TableFromTupleRange(
arrow::default_memory_pool(),
rows, names, &table).ok()
) {
// Error handling code should go here.
}
```
Table to tuple:
```cpp
// An important aspect here is that the table columns need to be in the
// same order as the columns will later appear in the tuple. As the tuple
// is unnamed, matching is done on positions.
std::shared_ptr<Table> table = ..
// The range needs to be pre-allocated to the respective amount of rows.
// This allows us to pass in an arbitrary range object, not only
// `std::vector`.
std::vector<std::tuple<double, std::string>> rows(2);
if (!arrow::stl::TupleRangeFromTable(*table, &rows).ok()) {
// Error handling code should go here.
}
```
Author: Korn, Uwe <Uw...@blue-yonder.com>
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #3404 from xhochy/stl-extension and squashes the following commits:
4856260a <Korn, Uwe> Cast to size_t to compare on equal signedness
aaeacfd1 <Uwe L. Korn> docker-compose run clang-format
386e5bc9 <Korn, Uwe> Check size of target
8b472da8 <Korn, Uwe> Update documentation
1a3743e0 <Korn, Uwe> Allow building shared libs without tests
9a08a3eb <Korn, Uwe> Use full path to checked_cast
e037507c <Korn, Uwe> Use ArrayFromJSON
1ab23f87 <Korn, Uwe> Move to type_singleton
30e66f9e <Korn, Uwe> Add additional STL conversions
---
cpp/CMakeLists.txt | 2 +-
cpp/src/arrow/stl-test.cc | 169 ++++++++++++--
cpp/src/arrow/stl.h | 255 ++++++++++++++++++++-
cpp/src/arrow/type.cc | 12 +
cpp/src/arrow/type.h | 5 +
dev/lint/Dockerfile | 3 +-
docs/source/cpp/{ => examples}/index.rst | 17 +-
.../row_columnar_conversion.rst} | 7 +-
.../source/cpp/examples/tuple_range_conversion.rst | 106 +++++++++
docs/source/cpp/index.rst | 2 +-
10 files changed, 541 insertions(+), 37 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7d1fee0..ab9ff39 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -812,7 +812,7 @@ pass ARROW_BUILD_SHARED=on")
# Use shared linking for unit tests if it's available
set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS})
set(ARROW_EXAMPLE_LINK_LIBS arrow_shared)
-else()
+elseif(ARROW_BUILD_TESTS)
if(NOT ARROW_BUILD_STATIC)
message(FATAL_ERROR "If using static linkage for unit tests, must also \
pass ARROW_BUILD_STATIC=on")
diff --git a/cpp/src/arrow/stl-test.cc b/cpp/src/arrow/stl-test.cc
index 6c33098..04c4cf4 100644
--- a/cpp/src/arrow/stl-test.cc
+++ b/cpp/src/arrow/stl-test.cc
@@ -23,8 +23,13 @@
#include <gtest/gtest.h>
#include "arrow/stl.h"
+#include "arrow/table.h"
+#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
+using primitive_types_tuple = std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t,
+ uint16_t, uint32_t, uint64_t, bool, std::string>;
+
namespace arrow {
namespace stl {
@@ -36,12 +41,9 @@ TEST(TestSchemaFromTuple, PrimitiveTypesVector) {
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});
- std::shared_ptr<Schema> schema =
- SchemaFromTuple<std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
- uint32_t, uint64_t, bool, std::string>>::
- MakeSchema(std::vector<std::string>({"column1", "column2", "column3", "column4",
- "column5", "column6", "column7", "column8",
- "column9", "column10"}));
+ std::shared_ptr<Schema> schema = SchemaFromTuple<primitive_types_tuple>::MakeSchema(
+ std::vector<std::string>({"column1", "column2", "column3", "column4", "column5",
+ "column6", "column7", "column8", "column9", "column10"}));
ASSERT_TRUE(expected_schema.Equals(*schema));
}
@@ -53,13 +55,9 @@ TEST(TestSchemaFromTuple, PrimitiveTypesTuple) {
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});
- std::shared_ptr<Schema> schema = SchemaFromTuple<
- std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t,
- bool, std::string>>::MakeSchema(std::make_tuple("column1", "column2",
- "column3", "column4",
- "column5", "column6",
- "column7", "column8",
- "column9", "column10"));
+ std::shared_ptr<Schema> schema = SchemaFromTuple<primitive_types_tuple>::MakeSchema(
+ std::make_tuple("column1", "column2", "column3", "column4", "column5", "column6",
+ "column7", "column8", "column9", "column10"));
ASSERT_TRUE(expected_schema.Equals(*schema));
}
@@ -80,5 +78,150 @@ TEST(TestSchemaFromTuple, NestedList) {
ASSERT_TRUE(expected_schema.Equals(*schema));
}
+TEST(TestTableFromTupleVector, PrimitiveTypes) {
+ std::vector<std::string> names{"column1", "column2", "column3", "column4", "column5",
+ "column6", "column7", "column8", "column9", "column10"};
+ std::vector<primitive_types_tuple> rows{
+ primitive_types_tuple(-1, -2, -3, -4, 1, 2, 3, 4, true, "Tests"),
+ primitive_types_tuple(-10, -20, -30, -40, 10, 20, 30, 40, false, "Other")};
+ std::shared_ptr<Table> table;
+ ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table));
+
+ std::shared_ptr<Schema> expected_schema =
+ schema({field("column1", int8(), false), field("column2", int16(), false),
+ field("column3", int32(), false), field("column4", int64(), false),
+ field("column5", uint8(), false), field("column6", uint16(), false),
+ field("column7", uint32(), false), field("column8", uint64(), false),
+ field("column9", boolean(), false), field("column10", utf8(), false)});
+
+ // Construct expected arrays
+ std::shared_ptr<Array> int8_array = ArrayFromJSON(int8(), "[-1, -10]");
+ std::shared_ptr<Array> int16_array = ArrayFromJSON(int16(), "[-2, -20]");
+ std::shared_ptr<Array> int32_array = ArrayFromJSON(int32(), "[-3, -30]");
+ std::shared_ptr<Array> int64_array = ArrayFromJSON(int64(), "[-4, -40]");
+ std::shared_ptr<Array> uint8_array = ArrayFromJSON(uint8(), "[1, 10]");
+ std::shared_ptr<Array> uint16_array = ArrayFromJSON(uint16(), "[2, 20]");
+ std::shared_ptr<Array> uint32_array = ArrayFromJSON(uint32(), "[3, 30]");
+ std::shared_ptr<Array> uint64_array = ArrayFromJSON(uint64(), "[4, 40]");
+ std::shared_ptr<Array> bool_array = ArrayFromJSON(boolean(), "[true, false]");
+ std::shared_ptr<Array> string_array = ArrayFromJSON(utf8(), R"(["Tests", "Other"])");
+ auto expected_table =
+ Table::Make(expected_schema,
+ {int8_array, int16_array, int32_array, int64_array, uint8_array,
+ uint16_array, uint32_array, uint64_array, bool_array, string_array});
+
+ ASSERT_TRUE(expected_table->Equals(*table));
+}
+
+TEST(TestTableFromTupleVector, ListType) {
+ using tuple_type = std::tuple<std::vector<int64_t>>;
+
+ auto expected_schema =
+ std::shared_ptr<Schema>(new Schema({field("column1", list(int64()), false)}));
+ std::shared_ptr<Array> expected_array =
+ ArrayFromJSON(list(int64()), "[[1, 1, 2, 34], [2, -4]]");
+ std::shared_ptr<Table> expected_table = Table::Make(expected_schema, {expected_array});
+
+ std::vector<tuple_type> rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
+ tuple_type(std::vector<int64_t>{2, -4})};
+ std::vector<std::string> names{"column1"};
+
+ std::shared_ptr<Table> table;
+ ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table));
+ ASSERT_TRUE(expected_table->Equals(*table));
+}
+
+TEST(TestTupleVectorFromTable, PrimitiveTypes) {
+ compute::FunctionContext ctx;
+ compute::CastOptions cast_options;
+
+ std::vector<primitive_types_tuple> expected_rows{
+ primitive_types_tuple(-1, -2, -3, -4, 1, 2, 3, 4, true, "Tests"),
+ primitive_types_tuple(-10, -20, -30, -40, 10, 20, 30, 40, false, "Other")};
+
+ std::shared_ptr<Schema> schema = std::shared_ptr<Schema>(
+ new Schema({field("column1", int8(), false), field("column2", int16(), false),
+ field("column3", int32(), false), field("column4", int64(), false),
+ field("column5", uint8(), false), field("column6", uint16(), false),
+ field("column7", uint32(), false), field("column8", uint64(), false),
+ field("column9", boolean(), false), field("column10", utf8(), false)}));
+
+ // Construct expected arrays
+ std::shared_ptr<Array> int8_array;
+ ArrayFromVector<Int8Type, int8_t>({-1, -10}, &int8_array);
+ std::shared_ptr<Array> int16_array;
+ ArrayFromVector<Int16Type, int16_t>({-2, -20}, &int16_array);
+ std::shared_ptr<Array> int32_array;
+ ArrayFromVector<Int32Type, int32_t>({-3, -30}, &int32_array);
+ std::shared_ptr<Array> int64_array;
+ ArrayFromVector<Int64Type, int64_t>({-4, -40}, &int64_array);
+ std::shared_ptr<Array> uint8_array;
+ ArrayFromVector<UInt8Type, uint8_t>({1, 10}, &uint8_array);
+ std::shared_ptr<Array> uint16_array;
+ ArrayFromVector<UInt16Type, uint16_t>({2, 20}, &uint16_array);
+ std::shared_ptr<Array> uint32_array;
+ ArrayFromVector<UInt32Type, uint32_t>({3, 30}, &uint32_array);
+ std::shared_ptr<Array> uint64_array;
+ ArrayFromVector<UInt64Type, uint64_t>({4, 40}, &uint64_array);
+ std::shared_ptr<Array> bool_array;
+ ArrayFromVector<BooleanType, bool>({true, false}, &bool_array);
+ std::shared_ptr<Array> string_array;
+ ArrayFromVector<StringType, std::string>({"Tests", "Other"}, &string_array);
+ auto table = Table::Make(
+ schema, {int8_array, int16_array, int32_array, int64_array, uint8_array,
+ uint16_array, uint32_array, uint64_array, bool_array, string_array});
+
+ std::vector<primitive_types_tuple> rows(2);
+ ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
+ ASSERT_EQ(rows, expected_rows);
+
+ // The number of rows must match
+ std::vector<primitive_types_tuple> too_few_rows(1);
+ ASSERT_RAISES(Invalid, TupleRangeFromTable(*table, cast_options, &ctx, &too_few_rows));
+
+ // The number of columns must match
+ std::shared_ptr<Table> corrupt_table;
+ ASSERT_OK(table->RemoveColumn(0, &corrupt_table));
+ ASSERT_RAISES(Invalid, TupleRangeFromTable(*corrupt_table, cast_options, &ctx, &rows));
+}
+
+TEST(TestTupleVectorFromTable, ListType) {
+ using tuple_type = std::tuple<std::vector<int64_t>>;
+
+ compute::FunctionContext ctx;
+ compute::CastOptions cast_options;
+ auto expected_schema =
+ std::shared_ptr<Schema>(new Schema({field("column1", list(int64()), false)}));
+ std::shared_ptr<Array> expected_array =
+ ArrayFromJSON(list(int64()), "[[1, 1, 2, 34], [2, -4]]");
+ std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});
+
+ std::vector<tuple_type> expected_rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
+ tuple_type(std::vector<int64_t>{2, -4})};
+
+ std::vector<tuple_type> rows(2);
+ ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
+ ASSERT_EQ(rows, expected_rows);
+}
+
+TEST(TestTupleVectorFromTable, CastingNeeded) {
+ using tuple_type = std::tuple<std::vector<int64_t>>;
+
+ compute::FunctionContext ctx;
+ compute::CastOptions cast_options;
+ auto expected_schema =
+ std::shared_ptr<Schema>(new Schema({field("column1", list(int16()), false)}));
+ std::shared_ptr<Array> expected_array =
+ ArrayFromJSON(list(int16()), "[[1, 1, 2, 34], [2, -4]]");
+ std::shared_ptr<Table> table = Table::Make(expected_schema, {expected_array});
+
+ std::vector<tuple_type> expected_rows{tuple_type(std::vector<int64_t>{1, 1, 2, 34}),
+ tuple_type(std::vector<int64_t>{2, -4})};
+
+ std::vector<tuple_type> rows(2);
+ ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows));
+ ASSERT_EQ(rows, expected_rows);
+}
+
} // namespace stl
} // namespace arrow
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
index def496b..d641e39 100644
--- a/cpp/src/arrow/stl.h
+++ b/cpp/src/arrow/stl.h
@@ -23,8 +23,12 @@
#include <tuple>
#include <vector>
+#include "arrow/builder.h"
+#include "arrow/compute/api.h"
+#include "arrow/table.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
namespace arrow {
@@ -32,6 +36,81 @@ class Schema;
namespace stl {
+/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
+template <typename T>
+struct ConversionTraits {};
+
+#define ARROW_STL_CONVERSION(c_type, ArrowType_) \
+ template <> \
+ struct ConversionTraits<c_type> : public CTypeTraits<c_type> { \
+ static Status AppendRow(typename TypeTraits<ArrowType_>::BuilderType& builder, \
+ c_type cell) { \
+ return builder.Append(cell); \
+ } \
+ static c_type GetEntry(const typename TypeTraits<ArrowType_>::ArrayType& array, \
+ size_t j) { \
+ return array.Value(j); \
+ } \
+ constexpr static bool nullable = false; \
+ };
+
+ARROW_STL_CONVERSION(bool, BooleanType)
+ARROW_STL_CONVERSION(int8_t, Int8Type)
+ARROW_STL_CONVERSION(int16_t, Int16Type)
+ARROW_STL_CONVERSION(int32_t, Int32Type)
+ARROW_STL_CONVERSION(int64_t, Int64Type)
+ARROW_STL_CONVERSION(uint8_t, UInt8Type)
+ARROW_STL_CONVERSION(uint16_t, UInt16Type)
+ARROW_STL_CONVERSION(uint32_t, UInt32Type)
+ARROW_STL_CONVERSION(uint64_t, UInt64Type)
+ARROW_STL_CONVERSION(float, FloatType)
+ARROW_STL_CONVERSION(double, DoubleType)
+
+template <>
+struct ConversionTraits<std::string> : public CTypeTraits<std::string> {
+ static Status AppendRow(StringBuilder& builder, const std::string& cell) {
+ return builder.Append(cell);
+ }
+ static std::string GetEntry(const StringArray& array, size_t j) {
+ return array.GetString(j);
+ }
+ constexpr static bool nullable = false;
+};
+
+template <typename value_c_type>
+struct ConversionTraits<std::vector<value_c_type>>
+ : public CTypeTraits<std::vector<value_c_type>> {
+ static Status AppendRow(ListBuilder& builder, std::vector<value_c_type> cell) {
+ using ElementBuilderType = typename TypeTraits<
+ typename ConversionTraits<value_c_type>::ArrowType>::BuilderType;
+ ARROW_RETURN_NOT_OK(builder.Append());
+ ElementBuilderType& value_builder =
+ ::arrow::internal::checked_cast<ElementBuilderType&>(*builder.value_builder());
+ for (auto const& value : cell) {
+ ARROW_RETURN_NOT_OK(
+ ConversionTraits<value_c_type>::AppendRow(value_builder, value));
+ }
+ return Status::OK();
+ }
+
+ static std::vector<value_c_type> GetEntry(const ListArray& array, size_t j) {
+ using ElementArrayType = typename TypeTraits<
+ typename ConversionTraits<value_c_type>::ArrowType>::ArrayType;
+
+ const ElementArrayType& value_array =
+ ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
+
+ std::vector<value_c_type> vec(array.value_length(j));
+ for (int64_t i = 0; i < array.value_length(j); i++) {
+ vec[i] = ConversionTraits<value_c_type>::GetEntry(value_array,
+ array.value_offset(j) + i);
+ }
+ return vec;
+ }
+
+ constexpr static bool nullable = false;
+};
+
/// Build an arrow::Schema based upon the types defined in a std::tuple-like structure.
///
/// While the type information is available at compile-time, we still need to add the
@@ -76,10 +155,12 @@ struct SchemaFromTuple {
template <typename NamesTuple>
static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
const NamesTuple& names) {
+ using std::get;
+
std::vector<std::shared_ptr<Field>> ret =
SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
- std::shared_ptr<DataType> type = CTypeTraits<Element>::type_singleton();
- ret.push_back(field(std::get<N - 1>(names), type, false /* nullable */));
+ std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
+ ret.push_back(field(get<N - 1>(names), type, ConversionTraits<Element>::nullable));
return ret;
}
@@ -116,7 +197,175 @@ struct SchemaFromTuple<Tuple, 0> {
return ret;
}
};
-/// @endcond
+
+namespace internal {
+template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
+struct CreateBuildersRecursive {
+ static Status Make(MemoryPool* pool,
+ std::vector<std::unique_ptr<ArrayBuilder>>* builders) {
+ using Element = typename std::tuple_element<N - 1, Tuple>::type;
+ std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
+ ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1)));
+
+ return CreateBuildersRecursive<Tuple, N - 1>::Make(pool, builders);
+ }
+};
+
+template <typename Tuple>
+struct CreateBuildersRecursive<Tuple, 0> {
+ static Status Make(MemoryPool*, std::vector<std::unique_ptr<ArrayBuilder>>*) {
+ return Status::OK();
+ }
+};
+
+template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
+struct RowIterator {
+ static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
+ const Tuple& row) {
+ using std::get;
+ using Element = typename std::tuple_element<N - 1, Tuple>::type;
+ using BuilderType =
+ typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::BuilderType;
+
+ BuilderType& builder =
+ ::arrow::internal::checked_cast<BuilderType&>(*builders[N - 1]);
+ ARROW_RETURN_NOT_OK(ConversionTraits<Element>::AppendRow(builder, get<N - 1>(row)));
+
+ return RowIterator<Tuple, N - 1>::Append(builders, row);
+ }
+};
+
+template <typename Tuple>
+struct RowIterator<Tuple, 0> {
+ static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
+ const Tuple& row) {
+ return Status::OK();
+ }
+};
+
+template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
+struct EnsureColumnTypes {
+ static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
+ const compute::CastOptions& cast_options,
+ compute::FunctionContext* ctx,
+ std::reference_wrapper<const ::arrow::Table>* result) {
+ using Element = typename std::tuple_element<N - 1, Tuple>::type;
+ std::shared_ptr<DataType> expected_type = ConversionTraits<Element>::type_singleton();
+
+ if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
+ compute::Datum casted;
+ ARROW_RETURN_NOT_OK(compute::Cast(ctx, compute::Datum(table.column(N - 1)->data()),
+ expected_type, cast_options, &casted));
+ std::shared_ptr<Column> new_column = std::make_shared<Column>(
+ table.schema()->field(N - 1)->WithType(expected_type), casted.chunked_array());
+ ARROW_RETURN_NOT_OK(table.SetColumn(N - 1, new_column, table_owner));
+ *result = **table_owner;
+ }
+
+ return EnsureColumnTypes<Tuple, N - 1>::Cast(result->get(), table_owner, cast_options,
+ ctx, result);
+ }
+};
+
+template <typename Tuple>
+struct EnsureColumnTypes<Tuple, 0> {
+ static Status Cast(const Table& table, std::shared_ptr<Table>* table_ownder,
+ const compute::CastOptions& cast_options,
+ compute::FunctionContext* ctx,
+ std::reference_wrapper<const ::arrow::Table>* result) {
+ return Status::OK();
+ }
+};
+
+template <typename Range, typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
+struct TupleSetter {
+ static void Fill(const Table& table, Range* rows) {
+ using std::get;
+ using Element = typename std::tuple_element<N - 1, Tuple>::type;
+ using ArrayType =
+ typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::ArrayType;
+
+ auto iter = rows->begin();
+ const ChunkedArray& chunked_array = *table.column(N - 1)->data();
+ for (int i = 0; i < chunked_array.num_chunks(); i++) {
+ const ArrayType& array =
+ ::arrow::internal::checked_cast<const ArrayType&>(*chunked_array.chunk(i));
+ for (int64_t j = 0; j < array.length(); j++) {
+ get<N - 1>(*iter++) = ConversionTraits<Element>::GetEntry(array, j);
+ }
+ }
+
+ return TupleSetter<Range, Tuple, N - 1>::Fill(table, rows);
+ }
+};
+
+template <typename Range, typename Tuple>
+struct TupleSetter<Range, Tuple, 0> {
+ static void Fill(const Table& table, Range* rows) {}
+};
+
+} // namespace internal
+
+template <typename Range>
+Status TableFromTupleRange(MemoryPool* pool, const Range& rows,
+ const std::vector<std::string>& names,
+ std::shared_ptr<Table>* table) {
+ using row_type = typename std::iterator_traits<decltype(std::begin(rows))>::value_type;
+ constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
+
+ std::shared_ptr<Schema> schema = SchemaFromTuple<row_type>::MakeSchema(names);
+
+ std::vector<std::unique_ptr<ArrayBuilder>> builders(n_columns);
+ ARROW_RETURN_NOT_OK(internal::CreateBuildersRecursive<row_type>::Make(pool, &builders));
+
+ for (auto const& row : rows) {
+ ARROW_RETURN_NOT_OK(internal::RowIterator<row_type>::Append(builders, row));
+ }
+
+ std::vector<std::shared_ptr<Array>> arrays;
+ for (auto const& builder : builders) {
+ std::shared_ptr<Array> array;
+ ARROW_RETURN_NOT_OK(builder->Finish(&array));
+ arrays.emplace_back(array);
+ }
+
+ *table = Table::Make(schema, arrays);
+
+ return Status::OK();
+}
+
+template <typename Range>
+Status TupleRangeFromTable(const Table& table, const compute::CastOptions& cast_options,
+ compute::FunctionContext* ctx, Range* rows) {
+ using row_type = typename std::decay<decltype(*std::begin(*rows))>::type;
+ constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
+
+ if (table.schema()->num_fields() != n_columns) {
+ std::stringstream ss;
+ ss << "Number of columns in the table does not match the width of the target: ";
+ ss << table.schema()->num_fields() << " != " << n_columns;
+ return Status::Invalid(ss.str());
+ }
+
+ // TODO: Use std::size with C++17
+ if (rows->size() != static_cast<size_t>(table.num_rows())) {
+ std::stringstream ss;
+ ss << "Number of rows in the table does not match the size of the target: ";
+ ss << table.num_rows() << " != " << rows->size();
+ return Status::Invalid(ss.str());
+ }
+
+ // Check that all columns have the correct type, otherwise cast them.
+ std::shared_ptr<Table> table_owner;
+ std::reference_wrapper<const ::arrow::Table> current_table(table);
+
+ ARROW_RETURN_NOT_OK(internal::EnsureColumnTypes<row_type>::Cast(
+ table, &table_owner, cast_options, ctx, ¤t_table));
+
+ internal::TupleSetter<Range, row_type>::Fill(current_table.get(), rows);
+
+ return Status::OK();
+}
} // namespace stl
} // namespace arrow
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 79ad7c1..2024899 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -50,6 +50,10 @@ std::shared_ptr<Field> Field::RemoveMetadata() const {
return std::make_shared<Field>(name_, type_, nullable_);
}
+std::shared_ptr<Field> Field::WithType(const std::shared_ptr<DataType>& type) const {
+ return std::make_shared<Field>(name_, type, nullable_, metadata_);
+}
+
std::vector<std::shared_ptr<Field>> Field::Flatten() const {
std::vector<std::shared_ptr<Field>> flattened;
if (type_->id() == Type::STRUCT) {
@@ -455,6 +459,14 @@ std::string Schema::ToString() const {
return buffer.str();
}
+std::vector<std::string> Schema::field_names() const {
+ std::vector<std::string> names;
+ for (auto& field : fields_) {
+ names.push_back(field->name());
+ }
+ return names;
+}
+
std::shared_ptr<Schema> schema(const std::vector<std::shared_ptr<Field>>& fields,
const std::shared_ptr<const KeyValueMetadata>& metadata) {
return std::make_shared<Schema>(fields, metadata);
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 3d01b5a..4f61f2d 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -263,6 +263,9 @@ class ARROW_EXPORT Field {
/// \brief Return a copy of this field without any metadata attached to it
std::shared_ptr<Field> RemoveMetadata() const;
+ /// \brief Return a copy of this field with the replaced type.
+ std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;
+
std::vector<std::shared_ptr<Field>> Flatten() const;
bool Equals(const Field& other, bool check_metadata = true) const;
@@ -828,6 +831,8 @@ class ARROW_EXPORT Schema {
const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
+ std::vector<std::string> field_names() const;
+
/// \brief The custom key-value metadata, if any
///
/// \return metadata may be null
diff --git a/dev/lint/Dockerfile b/dev/lint/Dockerfile
index 9a547c8..ae229d5 100644
--- a/dev/lint/Dockerfile
+++ b/dev/lint/Dockerfile
@@ -17,8 +17,7 @@
FROM arrow:python-3.6
-RUN apt-get install -y -q gnupg && \
- apt-get update && \
+RUN apt-get update && \
apt-get install -y -q \
clang-7 \
libclang-7-dev \
diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/examples/index.rst
similarity index 79%
copy from docs/source/cpp/index.rst
copy to docs/source/cpp/examples/index.rst
index 1d70e6a..464c77f 100644
--- a/docs/source/cpp/index.rst
+++ b/docs/source/cpp/examples/index.rst
@@ -15,18 +15,11 @@
.. specific language governing permissions and limitations
.. under the License.
-C++ Implementation
-==================
+Examples
+========
.. toctree::
- :maxdepth: 2
+ :maxdepth: 1
- getting_started
- examples
- api
-
-.. TODO add "topics" chapter
-.. - nested arrays
-.. - dictionary encoding
-
-.. TODO add "building" or "development" chapter
+ row_columnar_conversion
+ std::tuple-like ranges to Arrow <tuple_range_conversion>
diff --git a/docs/source/cpp/examples.rst b/docs/source/cpp/examples/row_columnar_conversion.rst
similarity index 89%
rename from docs/source/cpp/examples.rst
rename to docs/source/cpp/examples/row_columnar_conversion.rst
index 5f4372f..02fd61b 100644
--- a/docs/source/cpp/examples.rst
+++ b/docs/source/cpp/examples/row_columnar_conversion.rst
@@ -18,13 +18,10 @@
.. default-domain:: cpp
.. highlight:: cpp
-Examples
-========
-
Row to columnar conversion
---------------------------
+==========================
The following example converts an array of structs to a :class:`arrow::Table`
instance, and then converts it back to the original array of structs.
-.. literalinclude:: ../../../cpp/examples/arrow/row-wise-conversion-example.cc
+.. literalinclude:: ../../../../cpp/examples/arrow/row-wise-conversion-example.cc
diff --git a/docs/source/cpp/examples/tuple_range_conversion.rst b/docs/source/cpp/examples/tuple_range_conversion.rst
new file mode 100644
index 0000000..64ba237
--- /dev/null
+++ b/docs/source/cpp/examples/tuple_range_conversion.rst
@@ -0,0 +1,106 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Conversion of range of ``std::tuple``-like to ``Table`` instances
+=================================================================
+
+While the above example shows a quite manual approach of a row to columnar
+conversion, Arrow also provides some template logic to convert ranges of
+``std::tuple<..>``-like objects to tables.
+
+In the most simple case, you only need to provide the input data and the
+type conversion is then inferred at compile time.
+
+.. code::
+
+ std::vector<std::tuple<double, std::string>> rows = ..
+ std::shared_ptr<Table> table;
+
+ if (!arrow::stl::TableFromTupleRange(
+ arrow::default_memory_pool(),
+ rows, names, &table).ok()
+ ) {
+ // Error handling code should go here.
+ }
+
+In reverse, you can use ``TupleRangeFromTable`` to fill an already
+pre-allocated range with the data from a ``Table`` instance.
+
+.. code::
+
+ // An important aspect here is that the table columns need to be in the
+ // same order as the columns will later appear in the tuple. As the tuple
+ // is unnamed, matching is done on positions.
+ std::shared_ptr<Table> table = ..
+
+ // The range needs to be pre-allocated to the respective amount of rows.
+ // This allows us to pass in an arbitrary range object, not only
+ // `std::vector`.
+ std::vector<std::tuple<double, std::string>> rows(2);
+ if (!arrow::stl::TupleRangeFromTable(*table, &rows).ok()) {
+ // Error handling code should go here.
+ }
+
+Arrow itself already supports some C(++) data types for this conversion. If you
+want to support additional data types, you need to implement a specialization
+of ``arrow::stl::ConversionTraits<T>`` and the more general
+``arrow::CTypeTraits<T>``.
+
+
+.. code::
+
+ namespace arrow {
+
+ template<>
+ struct CTypeTraits<boost::posix_time::ptime> {
+ using ArrowType = ::arrow::TimestampType;
+
+ static std::shared_ptr<::arrow::DataType> type_singleton() {
+ return ::arrow::timestamp(::arrow::TimeUnit::MICRO);
+ }
+ };
+
+ }
+
+ namespace arrow { namespace stl {
+
+ template <>
+ struct ConversionTraits<boost::posix_time::ptime> : public CTypeTraits<boost::posix_time::ptime> {
+ constexpr static bool nullable = false;
+
+ // This is the specialization to load a scalar value into an Arrow builder.
+ static Status AppendRow(
+ typename TypeTraits<TimestampType>::BuilderType& builder,
+ boost::posix_time::ptime cell) {
+ boost::posix_time::ptime const epoch({1970, 1, 1}, {0, 0, 0, 0});
+ return builder.Append((cell - epoch).total_microseconds());
+ }
+
+ // Specify how we can fill the tuple from the values stored in the Arrow
+ // array.
+ static boost::posix_time::ptime GetEntry(
+ const TimestampArray& array, size_t j) {
+ return psapp::arrow::internal::timestamp_epoch
+ + boost::posix_time::time_duration(0, 0, 0, array.Value(j));
+ }
+ };
+
+ }}
+
diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/index.rst
index 1d70e6a..b3f6e4c 100644
--- a/docs/source/cpp/index.rst
+++ b/docs/source/cpp/index.rst
@@ -22,7 +22,7 @@ C++ Implementation
:maxdepth: 2
getting_started
- examples
+ Examples <examples/index>
api
.. TODO add "topics" chapter