You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/02 19:41:49 UTC
arrow git commit: ARROW-576: [C++] Complete file/stream
implementation for union types
Repository: arrow
Updated Branches:
refs/heads/master 2c3bd9311 -> 0637e05d5
ARROW-576: [C++] Complete file/stream implementation for union types
Author: Wes McKinney <we...@twosigma.com>
Closes #356 from wesm/ARROW-576 and squashes the following commits:
e239ba1 [Wes McKinney] Fix miniconda links
12fde46 [Wes McKinney] Complete metadata roundtrip for unions
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0637e05d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0637e05d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0637e05d
Branch: refs/heads/master
Commit: 0637e05d59f20363a9103ffad5712f981314c4df
Parents: 2c3bd93
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Mar 2 14:41:29 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Mar 2 14:41:29 2017 -0500
----------------------------------------------------------------------
ci/travis_install_conda.sh | 4 +-
cpp/src/arrow/ipc/ipc-file-test.cc | 2 +-
cpp/src/arrow/ipc/metadata-internal.cc | 101 ++++++++++++++++++----------
3 files changed, 67 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/ci/travis_install_conda.sh
----------------------------------------------------------------------
diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh
index ffa017c..9c13b1b 100644
--- a/ci/travis_install_conda.sh
+++ b/ci/travis_install_conda.sh
@@ -15,9 +15,9 @@
set -e
if [ $TRAVIS_OS_NAME == "linux" ]; then
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
+ MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
else
- MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
+ MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
fi
wget -O miniconda.sh $MINICONDA_URL
http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/ipc-file-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc
index e58f2cf..0c95c8e 100644
--- a/cpp/src/arrow/ipc/ipc-file-test.cc
+++ b/cpp/src/arrow/ipc/ipc-file-test.cc
@@ -180,7 +180,7 @@ TEST_P(TestStreamFormat, RoundTrip) {
#define BATCH_CASES() \
::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \
&MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \
- &MakeStruct, &MakeDictionary);
+ &MakeStruct, &MakeUnion, &MakeDictionary);
INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES());
INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES());
http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 1cc4a23..17a3a5f 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -78,43 +78,6 @@ static Status FloatFromFlatuffer(
return Status::OK();
}
-static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
- const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
- switch (type) {
- case flatbuf::Type_NONE:
- return Status::Invalid("Type metadata cannot be none");
- case flatbuf::Type_Int:
- return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
- case flatbuf::Type_FloatingPoint:
- return FloatFromFlatuffer(
- static_cast<const flatbuf::FloatingPoint*>(type_data), out);
- case flatbuf::Type_Binary:
- *out = binary();
- return Status::OK();
- case flatbuf::Type_Utf8:
- *out = utf8();
- return Status::OK();
- case flatbuf::Type_Bool:
- *out = boolean();
- return Status::OK();
- case flatbuf::Type_Decimal:
- case flatbuf::Type_Timestamp:
- case flatbuf::Type_List:
- if (children.size() != 1) {
- return Status::Invalid("List must have exactly 1 child field");
- }
- *out = std::make_shared<ListType>(children[0]);
- return Status::OK();
- case flatbuf::Type_Struct_:
- *out = std::make_shared<StructType>(children);
- return Status::OK();
- case flatbuf::Type_Union:
- return Status::NotImplemented("Type is not implemented");
- default:
- return Status::Invalid("Unrecognized type");
- }
-}
-
// Forward declaration
static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
DictionaryMemo* dictionary_memo, FieldOffset* offset);
@@ -153,6 +116,32 @@ static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type
return Status::OK();
}
+// ----------------------------------------------------------------------
+// Union implementation
+
+static Status UnionFromFlatbuffer(const flatbuf::Union* union_data,
+ const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
+ UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? UnionMode::SPARSE
+ : UnionMode::DENSE;
+
+ std::vector<uint8_t> type_codes;
+
+ const flatbuffers::Vector<int32_t>* fb_type_ids = union_data->typeIds();
+ if (fb_type_ids == nullptr) {
+ for (uint8_t i = 0; i < children.size(); ++i) {
+ type_codes.push_back(i);
+ }
+ } else {
+ for (int32_t id : (*fb_type_ids)) {
+ // TODO(wesm): can these values exceed 255?
+ type_codes.push_back(static_cast<uint8_t>(id));
+ }
+ }
+
+ *out = union_(children, type_codes, mode);
+ return Status::OK();
+}
+
static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
std::vector<FieldOffset>* out_children, DictionaryMemo* dictionary_memo,
Offset* offset) {
@@ -181,6 +170,44 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
*offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
break;
+static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
+ const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
+ switch (type) {
+ case flatbuf::Type_NONE:
+ return Status::Invalid("Type metadata cannot be none");
+ case flatbuf::Type_Int:
+ return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
+ case flatbuf::Type_FloatingPoint:
+ return FloatFromFlatuffer(
+ static_cast<const flatbuf::FloatingPoint*>(type_data), out);
+ case flatbuf::Type_Binary:
+ *out = binary();
+ return Status::OK();
+ case flatbuf::Type_Utf8:
+ *out = utf8();
+ return Status::OK();
+ case flatbuf::Type_Bool:
+ *out = boolean();
+ return Status::OK();
+ case flatbuf::Type_Decimal:
+ case flatbuf::Type_Timestamp:
+ case flatbuf::Type_List:
+ if (children.size() != 1) {
+ return Status::Invalid("List must have exactly 1 child field");
+ }
+ *out = std::make_shared<ListType>(children[0]);
+ return Status::OK();
+ case flatbuf::Type_Struct_:
+ *out = std::make_shared<StructType>(children);
+ return Status::OK();
+ case flatbuf::Type_Union:
+ return UnionFromFlatbuffer(
+ static_cast<const flatbuf::Union*>(type_data), children, out);
+ default:
+ return Status::Invalid("Unrecognized type");
+ }
+}
+
// TODO(wesm): Convert this to visitor pattern
static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
std::vector<FieldOffset>* children, std::vector<VectorLayoutOffset>* layout,