You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/22 13:26:23 UTC

arrow git commit: ARROW-654: [C++] Serialize timezone in IPC metadata

Repository: arrow
Updated Branches:
  refs/heads/master d25286718 -> 96734efb7


ARROW-654: [C++] Serialize timezone in IPC metadata

Author: Wes McKinney <we...@twosigma.com>

Closes #416 from wesm/ARROW-654 and squashes the following commits:

001708e [Wes McKinney] Fix API change in Python bindings
3729cf9 [Wes McKinney] Serialize timezone in IPC metadata


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/96734efb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/96734efb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/96734efb

Branch: refs/heads/master
Commit: 96734efb73852f2d8372f72d7c56e8fb3ab4e516
Parents: d252867
Author: Wes McKinney <we...@twosigma.com>
Authored: Wed Mar 22 09:26:09 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Mar 22 09:26:09 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/ipc/feather-test.cc    |  2 +-
 cpp/src/arrow/ipc/feather.cc         |  2 +-
 cpp/src/arrow/ipc/metadata.cc        | 16 ++++++++++++++--
 cpp/src/arrow/ipc/test-common.h      |  2 +-
 cpp/src/arrow/memory_pool.cc         |  2 +-
 cpp/src/arrow/type-test.cc           |  2 +-
 cpp/src/arrow/type.cc                |  4 ++--
 cpp/src/arrow/type.h                 |  4 ++--
 python/pyarrow/includes/libarrow.pxd |  4 ++--
 python/pyarrow/schema.pyx            |  2 +-
 10 files changed, 26 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc
index 078c3e1..2513887 100644
--- a/cpp/src/arrow/ipc/feather-test.cc
+++ b/cpp/src/arrow/ipc/feather-test.cc
@@ -355,7 +355,7 @@ TEST_F(TestTableWriter, TimeTypes) {
   auto f0 = field("f0", date32());
   auto f1 = field("f1", time(TimeUnit::MILLI));
   auto f2 = field("f2", timestamp(TimeUnit::NANO));
-  auto f3 = field("f3", timestamp("US/Los_Angeles", TimeUnit::SECOND));
+  auto f3 = field("f3", timestamp(TimeUnit::SECOND, "US/Los_Angeles"));
   std::shared_ptr<Schema> schema(new Schema({f0, f1, f2, f3}));
 
   std::vector<int64_t> values_vec = {0, 1, 2, 3, 4, 5, 6};

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 72bbaa4..0dd9a81 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -287,7 +287,7 @@ class TableReader::TableReaderImpl {
         } else {
           tz = "";
         }
-        *out = std::make_shared<TimestampType>(tz, unit);
+        *out = timestamp(unit, tz);
       } break;
       case fbs::TypeMetadata_DateMetadata:
         *out = date32();

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index a418d48..4dfda54 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -46,6 +46,7 @@ using LargeRecordBatchOffset = flatbuffers::Offset<flatbuf::LargeRecordBatch>;
 using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
 using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>;
 using Offset = flatbuffers::Offset<void>;
+using FBString = flatbuffers::Offset<flatbuffers::String>;
 
 static constexpr flatbuf::MetadataVersion kMetadataVersion = flatbuf::MetadataVersion_V2;
 
@@ -250,7 +251,12 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
     }
     case flatbuf::Type_Timestamp: {
       auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data);
-      *out = timestamp(FromFlatbufferUnit(ts_type->unit()));
+      TimeUnit unit = FromFlatbufferUnit(ts_type->unit());
+      if (ts_type->timezone() != 0 && ts_type->timezone()->Length() > 0) {
+        *out = timestamp(unit, ts_type->timezone()->str());
+      } else {
+        *out = timestamp(unit);
+      }
       return Status::OK();
     }
     case flatbuf::Type_Interval:
@@ -364,7 +370,13 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
     case Type::TIMESTAMP: {
       const auto& ts_type = static_cast<const TimestampType&>(*type);
       *out_type = flatbuf::Type_Timestamp;
-      *offset = flatbuf::CreateTimestamp(fbb, ToFlatbufferUnit(ts_type.unit)).Union();
+
+      flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(ts_type.unit);
+      FBString fb_timezone = 0;
+      if (ts_type.timezone.size() > 0) {
+        fb_timezone = fbb.CreateString(ts_type.timezone);
+      }
+      *offset = flatbuf::CreateTimestamp(fbb, fb_unit, fb_timezone).Union();
     } break;
     case Type::LIST:
       *out_type = flatbuf::Type_List;

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index ba203b0..330af0c 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -497,7 +497,7 @@ Status MakeDate32(std::shared_ptr<RecordBatch>* out) {
 Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
   std::vector<bool> is_valid = {true, true, true, false, true, true, true};
   auto f0 = field("f0", timestamp(TimeUnit::MILLI));
-  auto f1 = field("f1", timestamp(TimeUnit::NANO));
+  auto f1 = field("f1", timestamp(TimeUnit::NANO, "America/New_York"));
   auto f2 = field("f2", timestamp(TimeUnit::SECOND));
   std::shared_ptr<Schema> schema(new Schema({f0, f1, f2}));
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index cf01a02..7992f22 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -19,10 +19,10 @@
 
 #include <algorithm>
 #include <cstdlib>
+#include <iostream>
 #include <mutex>
 #include <sstream>
 #include <stdlib.h>
-#include <iostream>
 
 #include "arrow/status.h"
 #include "arrow/util/logging.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index ddfff87..22aa7eb 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -209,7 +209,7 @@ TEST(TestTimestampType, Equals) {
 
 TEST(TestTimestampType, ToString) {
   auto t1 = timestamp(TimeUnit::MILLI);
-  auto t2 = timestamp("US/Eastern", TimeUnit::NANO);
+  auto t2 = timestamp(TimeUnit::NANO, "US/Eastern");
   auto t3 = timestamp(TimeUnit::SECOND);
   auto t4 = timestamp(TimeUnit::MICRO);
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ee0a89a..64070cb 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -244,8 +244,8 @@ std::shared_ptr<DataType> timestamp(TimeUnit unit) {
   return std::make_shared<TimestampType>(unit);
 }
 
-std::shared_ptr<DataType> timestamp(const std::string& timezone, TimeUnit unit) {
-  return std::make_shared<TimestampType>(timezone, unit);
+std::shared_ptr<DataType> timestamp(TimeUnit unit, const std::string& timezone) {
+  return std::make_shared<TimestampType>(unit, timezone);
 }
 
 std::shared_ptr<DataType> time(TimeUnit unit) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index adc3161..27b28d2 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -532,7 +532,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType {
   explicit TimestampType(TimeUnit unit = TimeUnit::MILLI)
       : FixedWidthType(Type::TIMESTAMP), unit(unit) {}
 
-  explicit TimestampType(const std::string& timezone, TimeUnit unit = TimeUnit::MILLI)
+  explicit TimestampType(TimeUnit unit, const std::string& timezone)
       : FixedWidthType(Type::TIMESTAMP), unit(unit), timezone(timezone) {}
 
   TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
@@ -603,7 +603,7 @@ std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& val
 
 std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit);
 std::shared_ptr<DataType> ARROW_EXPORT timestamp(
-    const std::string& timezone, TimeUnit unit);
+    TimeUnit unit, const std::string& timezone);
 std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit);
 
 std::shared_ptr<DataType> ARROW_EXPORT struct_(

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 705fe6b..2d698d3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -99,14 +99,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CArray] dictionary()
 
     shared_ptr[CDataType] timestamp(TimeUnit unit)
-    shared_ptr[CDataType] timestamp(const c_string& timezone, TimeUnit unit)
+    shared_ptr[CDataType] timestamp(TimeUnit unit, const c_string& timezone)
 
     cdef cppclass CMemoryPool" arrow::MemoryPool":
         int64_t bytes_allocated()
 
     cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
         CLoggingMemoryPool(CMemoryPool*)
-        
+
     cdef cppclass CBuffer" arrow::Buffer":
         uint8_t* data()
         int64_t size()

http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 4bc938d..ee38144 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -314,7 +314,7 @@ def timestamp(unit_str, tz=None):
             tz = tz.zone
 
         c_timezone = tobytes(tz)
-        out.init(la.timestamp(c_timezone, unit))
+        out.init(la.timestamp(unit, c_timezone))
 
     return out