You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/22 13:26:23 UTC
arrow git commit: ARROW-654: [C++] Serialize timezone in IPC metadata
Repository: arrow
Updated Branches:
refs/heads/master d25286718 -> 96734efb7
ARROW-654: [C++] Serialize timezone in IPC metadata
Author: Wes McKinney <we...@twosigma.com>
Closes #416 from wesm/ARROW-654 and squashes the following commits:
001708e [Wes McKinney] Fix API change in Python bindings
3729cf9 [Wes McKinney] Serialize timezone in IPC metadata
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/96734efb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/96734efb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/96734efb
Branch: refs/heads/master
Commit: 96734efb73852f2d8372f72d7c56e8fb3ab4e516
Parents: d252867
Author: Wes McKinney <we...@twosigma.com>
Authored: Wed Mar 22 09:26:09 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Mar 22 09:26:09 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/ipc/feather-test.cc | 2 +-
cpp/src/arrow/ipc/feather.cc | 2 +-
cpp/src/arrow/ipc/metadata.cc | 16 ++++++++++++++--
cpp/src/arrow/ipc/test-common.h | 2 +-
cpp/src/arrow/memory_pool.cc | 2 +-
cpp/src/arrow/type-test.cc | 2 +-
cpp/src/arrow/type.cc | 4 ++--
cpp/src/arrow/type.h | 4 ++--
python/pyarrow/includes/libarrow.pxd | 4 ++--
python/pyarrow/schema.pyx | 2 +-
10 files changed, 26 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc
index 078c3e1..2513887 100644
--- a/cpp/src/arrow/ipc/feather-test.cc
+++ b/cpp/src/arrow/ipc/feather-test.cc
@@ -355,7 +355,7 @@ TEST_F(TestTableWriter, TimeTypes) {
auto f0 = field("f0", date32());
auto f1 = field("f1", time(TimeUnit::MILLI));
auto f2 = field("f2", timestamp(TimeUnit::NANO));
- auto f3 = field("f3", timestamp("US/Los_Angeles", TimeUnit::SECOND));
+ auto f3 = field("f3", timestamp(TimeUnit::SECOND, "US/Los_Angeles"));
std::shared_ptr<Schema> schema(new Schema({f0, f1, f2, f3}));
std::vector<int64_t> values_vec = {0, 1, 2, 3, 4, 5, 6};
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 72bbaa4..0dd9a81 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -287,7 +287,7 @@ class TableReader::TableReaderImpl {
} else {
tz = "";
}
- *out = std::make_shared<TimestampType>(tz, unit);
+ *out = timestamp(unit, tz);
} break;
case fbs::TypeMetadata_DateMetadata:
*out = date32();
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index a418d48..4dfda54 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -46,6 +46,7 @@ using LargeRecordBatchOffset = flatbuffers::Offset<flatbuf::LargeRecordBatch>;
using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>;
using Offset = flatbuffers::Offset<void>;
+using FBString = flatbuffers::Offset<flatbuffers::String>;
static constexpr flatbuf::MetadataVersion kMetadataVersion = flatbuf::MetadataVersion_V2;
@@ -250,7 +251,12 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
}
case flatbuf::Type_Timestamp: {
auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data);
- *out = timestamp(FromFlatbufferUnit(ts_type->unit()));
+ TimeUnit unit = FromFlatbufferUnit(ts_type->unit());
+ if (ts_type->timezone() != 0 && ts_type->timezone()->Length() > 0) {
+ *out = timestamp(unit, ts_type->timezone()->str());
+ } else {
+ *out = timestamp(unit);
+ }
return Status::OK();
}
case flatbuf::Type_Interval:
@@ -364,7 +370,13 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
case Type::TIMESTAMP: {
const auto& ts_type = static_cast<const TimestampType&>(*type);
*out_type = flatbuf::Type_Timestamp;
- *offset = flatbuf::CreateTimestamp(fbb, ToFlatbufferUnit(ts_type.unit)).Union();
+
+ flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(ts_type.unit);
+ FBString fb_timezone = 0;
+ if (ts_type.timezone.size() > 0) {
+ fb_timezone = fbb.CreateString(ts_type.timezone);
+ }
+ *offset = flatbuf::CreateTimestamp(fbb, fb_unit, fb_timezone).Union();
} break;
case Type::LIST:
*out_type = flatbuf::Type_List;
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index ba203b0..330af0c 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -497,7 +497,7 @@ Status MakeDate32(std::shared_ptr<RecordBatch>* out) {
Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) {
std::vector<bool> is_valid = {true, true, true, false, true, true, true};
auto f0 = field("f0", timestamp(TimeUnit::MILLI));
- auto f1 = field("f1", timestamp(TimeUnit::NANO));
+ auto f1 = field("f1", timestamp(TimeUnit::NANO, "America/New_York"));
auto f2 = field("f2", timestamp(TimeUnit::SECOND));
std::shared_ptr<Schema> schema(new Schema({f0, f1, f2}));
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/memory_pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index cf01a02..7992f22 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -19,10 +19,10 @@
#include <algorithm>
#include <cstdlib>
+#include <iostream>
#include <mutex>
#include <sstream>
#include <stdlib.h>
-#include <iostream>
#include "arrow/status.h"
#include "arrow/util/logging.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index ddfff87..22aa7eb 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -209,7 +209,7 @@ TEST(TestTimestampType, Equals) {
TEST(TestTimestampType, ToString) {
auto t1 = timestamp(TimeUnit::MILLI);
- auto t2 = timestamp("US/Eastern", TimeUnit::NANO);
+ auto t2 = timestamp(TimeUnit::NANO, "US/Eastern");
auto t3 = timestamp(TimeUnit::SECOND);
auto t4 = timestamp(TimeUnit::MICRO);
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ee0a89a..64070cb 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -244,8 +244,8 @@ std::shared_ptr<DataType> timestamp(TimeUnit unit) {
return std::make_shared<TimestampType>(unit);
}
-std::shared_ptr<DataType> timestamp(const std::string& timezone, TimeUnit unit) {
- return std::make_shared<TimestampType>(timezone, unit);
+std::shared_ptr<DataType> timestamp(TimeUnit unit, const std::string& timezone) {
+ return std::make_shared<TimestampType>(unit, timezone);
}
std::shared_ptr<DataType> time(TimeUnit unit) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index adc3161..27b28d2 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -532,7 +532,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType {
explicit TimestampType(TimeUnit unit = TimeUnit::MILLI)
: FixedWidthType(Type::TIMESTAMP), unit(unit) {}
- explicit TimestampType(const std::string& timezone, TimeUnit unit = TimeUnit::MILLI)
+ explicit TimestampType(TimeUnit unit, const std::string& timezone)
: FixedWidthType(Type::TIMESTAMP), unit(unit), timezone(timezone) {}
TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
@@ -603,7 +603,7 @@ std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& val
std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit);
std::shared_ptr<DataType> ARROW_EXPORT timestamp(
- const std::string& timezone, TimeUnit unit);
+ TimeUnit unit, const std::string& timezone);
std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit);
std::shared_ptr<DataType> ARROW_EXPORT struct_(
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 705fe6b..2d698d3 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -99,14 +99,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
shared_ptr[CArray] dictionary()
shared_ptr[CDataType] timestamp(TimeUnit unit)
- shared_ptr[CDataType] timestamp(const c_string& timezone, TimeUnit unit)
+ shared_ptr[CDataType] timestamp(TimeUnit unit, const c_string& timezone)
cdef cppclass CMemoryPool" arrow::MemoryPool":
int64_t bytes_allocated()
cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool):
CLoggingMemoryPool(CMemoryPool*)
-
+
cdef cppclass CBuffer" arrow::Buffer":
uint8_t* data()
int64_t size()
http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 4bc938d..ee38144 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -314,7 +314,7 @@ def timestamp(unit_str, tz=None):
tz = tz.zone
c_timezone = tobytes(tz)
- out.init(la.timestamp(c_timezone, unit))
+ out.init(la.timestamp(unit, c_timezone))
return out