You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/07/13 00:34:44 UTC

arrow git commit: ARROW-106: [C++] Add IPC to binary/string types

Repository: arrow
Updated Branches:
  refs/heads/master ff6132f8a -> 62390d842


ARROW-106: [C++] Add IPC to binary/string types

Author: Micah Kornfield <em...@gmail.com>

Closes #103 from emkornfield/emk_add_string_rpc and squashes the following commits:

9c563fe [Micah Kornfield] ARROW-106: [C++] Add IPC to binary/string types


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/62390d84
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/62390d84
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/62390d84

Branch: refs/heads/master
Commit: 62390d8427445b033ba7f7cf3150184222d2c2c1
Parents: ff6132f
Author: Micah Kornfield <em...@gmail.com>
Authored: Tue Jul 12 17:34:36 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Tue Jul 12 17:34:36 2016 -0700

----------------------------------------------------------------------
 cpp/src/arrow/ipc/adapter.cc          | 10 ++----
 cpp/src/arrow/ipc/ipc-adapter-test.cc | 52 ++++++++++++++++++++++++++++--
 cpp/src/arrow/types/construct.cc      |  4 +++
 3 files changed, 57 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/62390d84/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index 45cc288..bac1172 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -33,6 +33,7 @@
 #include "arrow/types/construct.h"
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/status.h"
@@ -81,14 +82,9 @@ static bool IsListType(const DataType* type) {
     // code consider using pattern like:
     // http://stackoverflow.com/questions/26784685/c-macro-for-calling-function-based-on-enum-type
     //
-    // TODO(emkornfield) Fix type systems so these are all considered lists and
-    // the types behave the same way?
-    // case Type::BINARY:
-    // case Type::CHAR:
+    case Type::BINARY:
     case Type::LIST:
-      // see todo on common types
-      // case Type::STRING:
-      // case Type::VARCHAR:
+    case Type::STRING:
       return true;
     default:
       return false;

http://git-wip-us.apache.org/repos/asf/arrow/blob/62390d84/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
index eb47ac6..2bfb459 100644
--- a/cpp/src/arrow/ipc/ipc-adapter-test.cc
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -31,6 +31,7 @@
 #include "arrow/test-util.h"
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/memory-pool.h"
@@ -105,6 +106,52 @@ Status MakeIntRowBatch(std::shared_ptr<RowBatch>* out) {
   return Status::OK();
 }
 
+template <class Builder, class RawType>
+Status MakeRandomBinaryArray(
+    const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* array) {
+  const std::vector<std::string> values = {
+      "", "", "abc", "123", "efg", "456!@#!@#", "12312"};
+  Builder builder(pool, type);
+  const auto values_len = values.size();
+  for (int32_t i = 0; i < length; ++i) {
+    int values_index = i % values_len;
+    if (values_index == 0) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      const std::string& value = values[values_index];
+      RETURN_NOT_OK(
+          builder.Append(reinterpret_cast<const RawType*>(value.data()), value.size()));
+    }
+  }
+  *array = builder.Finish();
+  return Status::OK();
+}
+
+Status MakeStringTypesRowBatch(std::shared_ptr<RowBatch>* out) {
+  const int32_t length = 500;
+  auto string_type = std::make_shared<StringType>();
+  auto binary_type = std::make_shared<BinaryType>();
+  auto f0 = std::make_shared<Field>("f0", string_type);
+  auto f1 = std::make_shared<Field>("f1", binary_type);
+  std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+
+  std::shared_ptr<Array> a0, a1;
+  MemoryPool* pool = default_memory_pool();
+
+  {
+    auto status =
+        MakeRandomBinaryArray<StringBuilder, char>(string_type, length, pool, &a0);
+    RETURN_NOT_OK(status);
+  }
+  {
+    auto status =
+        MakeRandomBinaryArray<BinaryBuilder, uint8_t>(binary_type, length, pool, &a1);
+    RETURN_NOT_OK(status);
+  }
+  out->reset(new RowBatch(schema, length, {a0, a1}));
+  return Status::OK();
+}
+
 Status MakeListRowBatch(std::shared_ptr<RowBatch>* out) {
   // Make the schema
   auto f0 = std::make_shared<Field>("f0", LIST_INT32);
@@ -191,9 +238,10 @@ Status MakeDeeplyNestedList(std::shared_ptr<RowBatch>* out) {
   return Status::OK();
 }
 
-INSTANTIATE_TEST_CASE_P(RoundTripTests, TestWriteRowBatch,
+INSTANTIATE_TEST_CASE_P(
+    RoundTripTests, TestWriteRowBatch,
     ::testing::Values(&MakeIntRowBatch, &MakeListRowBatch, &MakeNonNullRowBatch,
-                            &MakeZeroLengthRowBatch, &MakeDeeplyNestedList));
+        &MakeZeroLengthRowBatch, &MakeDeeplyNestedList, &MakeStringTypesRowBatch));
 
 void TestGetRowBatchSize(std::shared_ptr<RowBatch> batch) {
   MockMemorySource mock_source(1 << 16);

http://git-wip-us.apache.org/repos/asf/arrow/blob/62390d84/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 2d913a7..5ae9c5a 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -124,9 +124,13 @@ Status MakeListArray(const TypePtr& type, int32_t length,
     const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
   switch (type->type) {
     case Type::BINARY:
+      out->reset(new BinaryArray(type, length, offsets, values, null_count, null_bitmap));
+      break;
+
     case Type::LIST:
       out->reset(new ListArray(type, length, offsets, values, null_count, null_bitmap));
       break;
+
     case Type::DECIMAL_TEXT:
     case Type::STRING:
       out->reset(new StringArray(type, length, offsets, values, null_count, null_bitmap));