You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/02 19:41:49 UTC

arrow git commit: ARROW-576: [C++] Complete file/stream implementation for union types

Repository: arrow
Updated Branches:
  refs/heads/master 2c3bd9311 -> 0637e05d5


ARROW-576: [C++] Complete file/stream implementation for union types

Author: Wes McKinney <we...@twosigma.com>

Closes #356 from wesm/ARROW-576 and squashes the following commits:

e239ba1 [Wes McKinney] Fix miniconda links
12fde46 [Wes McKinney] Complete metadata roundtrip for unions


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0637e05d
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0637e05d
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0637e05d

Branch: refs/heads/master
Commit: 0637e05d59f20363a9103ffad5712f981314c4df
Parents: 2c3bd93
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Mar 2 14:41:29 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Thu Mar 2 14:41:29 2017 -0500

----------------------------------------------------------------------
 ci/travis_install_conda.sh             |   4 +-
 cpp/src/arrow/ipc/ipc-file-test.cc     |   2 +-
 cpp/src/arrow/ipc/metadata-internal.cc | 101 ++++++++++++++++++----------
 3 files changed, 67 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/ci/travis_install_conda.sh
----------------------------------------------------------------------
diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh
index ffa017c..9c13b1b 100644
--- a/ci/travis_install_conda.sh
+++ b/ci/travis_install_conda.sh
@@ -15,9 +15,9 @@
 set -e
 
 if [ $TRAVIS_OS_NAME == "linux" ]; then
-  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh"
+  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh"
 else
-  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh"
+  MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
 fi
 
 wget -O miniconda.sh $MINICONDA_URL

http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/ipc-file-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc
index e58f2cf..0c95c8e 100644
--- a/cpp/src/arrow/ipc/ipc-file-test.cc
+++ b/cpp/src/arrow/ipc/ipc-file-test.cc
@@ -180,7 +180,7 @@ TEST_P(TestStreamFormat, RoundTrip) {
 #define BATCH_CASES()                                                                   \
   ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \
       &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch,   \
-      &MakeStruct, &MakeDictionary);
+      &MakeStruct, &MakeUnion, &MakeDictionary);
 
 INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES());
 INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES());

http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 1cc4a23..17a3a5f 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -78,43 +78,6 @@ static Status FloatFromFlatuffer(
   return Status::OK();
 }
 
-static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
-    const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
-  switch (type) {
-    case flatbuf::Type_NONE:
-      return Status::Invalid("Type metadata cannot be none");
-    case flatbuf::Type_Int:
-      return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
-    case flatbuf::Type_FloatingPoint:
-      return FloatFromFlatuffer(
-          static_cast<const flatbuf::FloatingPoint*>(type_data), out);
-    case flatbuf::Type_Binary:
-      *out = binary();
-      return Status::OK();
-    case flatbuf::Type_Utf8:
-      *out = utf8();
-      return Status::OK();
-    case flatbuf::Type_Bool:
-      *out = boolean();
-      return Status::OK();
-    case flatbuf::Type_Decimal:
-    case flatbuf::Type_Timestamp:
-    case flatbuf::Type_List:
-      if (children.size() != 1) {
-        return Status::Invalid("List must have exactly 1 child field");
-      }
-      *out = std::make_shared<ListType>(children[0]);
-      return Status::OK();
-    case flatbuf::Type_Struct_:
-      *out = std::make_shared<StructType>(children);
-      return Status::OK();
-    case flatbuf::Type_Union:
-      return Status::NotImplemented("Type is not implemented");
-    default:
-      return Status::Invalid("Unrecognized type");
-  }
-}
-
 // Forward declaration
 static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
     DictionaryMemo* dictionary_memo, FieldOffset* offset);
@@ -153,6 +116,32 @@ static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type
   return Status::OK();
 }
 
+// ----------------------------------------------------------------------
+// Union implementation
+
+static Status UnionFromFlatbuffer(const flatbuf::Union* union_data,
+    const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
+  UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? UnionMode::SPARSE
+                                                                   : UnionMode::DENSE;
+
+  std::vector<uint8_t> type_codes;
+
+  const flatbuffers::Vector<int32_t>* fb_type_ids = union_data->typeIds();
+  if (fb_type_ids == nullptr) {
+    for (uint8_t i = 0; i < children.size(); ++i) {
+      type_codes.push_back(i);
+    }
+  } else {
+    for (int32_t id : (*fb_type_ids)) {
+      // TODO(wesm): can these values exceed 255?
+      type_codes.push_back(static_cast<uint8_t>(id));
+    }
+  }
+
+  *out = union_(children, type_codes, mode);
+  return Status::OK();
+}
+
 static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
     std::vector<FieldOffset>* out_children, DictionaryMemo* dictionary_memo,
     Offset* offset) {
@@ -181,6 +170,44 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
   *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
   break;
 
+static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
+    const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) {
+  switch (type) {
+    case flatbuf::Type_NONE:
+      return Status::Invalid("Type metadata cannot be none");
+    case flatbuf::Type_Int:
+      return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
+    case flatbuf::Type_FloatingPoint:
+      return FloatFromFlatuffer(
+          static_cast<const flatbuf::FloatingPoint*>(type_data), out);
+    case flatbuf::Type_Binary:
+      *out = binary();
+      return Status::OK();
+    case flatbuf::Type_Utf8:
+      *out = utf8();
+      return Status::OK();
+    case flatbuf::Type_Bool:
+      *out = boolean();
+      return Status::OK();
+    case flatbuf::Type_Decimal:
+    case flatbuf::Type_Timestamp:
+    case flatbuf::Type_List:
+      if (children.size() != 1) {
+        return Status::Invalid("List must have exactly 1 child field");
+      }
+      *out = std::make_shared<ListType>(children[0]);
+      return Status::OK();
+    case flatbuf::Type_Struct_:
+      *out = std::make_shared<StructType>(children);
+      return Status::OK();
+    case flatbuf::Type_Union:
+      return UnionFromFlatbuffer(
+          static_cast<const flatbuf::Union*>(type_data), children, out);
+    default:
+      return Status::Invalid("Unrecognized type");
+  }
+}
+
 // TODO(wesm): Convert this to visitor pattern
 static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
     std::vector<FieldOffset>* children, std::vector<VectorLayoutOffset>* layout,