You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/05 17:33:31 UTC
arrow git commit: ARROW-510 ARROW-582 ARROW-663 ARROW-729: [Java]
Added units for Time and Date types, and integration tests
Repository: arrow
Updated Branches:
refs/heads/master e29a7d4ca -> f4fcb42c2
ARROW-510 ARROW-582 ARROW-663 ARROW-729: [Java] Added units for Time and Date types, and integration tests
closes #366
Author: Leif Walsh <le...@gmail.com>
Author: Wes McKinney <we...@twosigma.com>
Closes #475 from leifwalsh/feature/java-date-time-types and squashes the following commits:
2e2a4cf [Leif Walsh] ARROW-729: [Java] removed Joda DateTime getters from Date* and Time* types
47f83a8 [Wes McKinney] Integration tests for all date and time combinations
6e86422 [Wes McKinney] ARROW-733: [C++/Python] Rename FixedWidthBinary to FixedSizeBinary for consistency with FixedSizeList
2dca474 [Leif Walsh] ARROW-729: [Java] Added units for date/time types
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f4fcb42c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f4fcb42c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f4fcb42c
Branch: refs/heads/master
Commit: f4fcb42c2cb0d463db4ddeef68e4392f8d7c049f
Parents: e29a7d4
Author: Leif Walsh <le...@gmail.com>
Authored: Wed Apr 5 13:33:25 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Apr 5 13:33:25 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/ipc/json-internal.cc | 13 ++
cpp/src/arrow/ipc/metadata.cc | 11 +-
cpp/src/arrow/type-test.cc | 8 +-
cpp/src/arrow/type.h | 15 +-
integration/integration_test.py | 145 +++++++++++++++----
.../src/main/codegen/data/ValueVectorTypes.tdd | 18 ++-
.../codegen/templates/FixedValueVectors.java | 21 +--
.../arrow/vector/file/json/JsonFileReader.java | 28 +++-
.../arrow/vector/file/json/JsonFileWriter.java | 28 +++-
.../org/apache/arrow/vector/types/Types.java | 95 ++++++++++--
.../arrow/vector/types/pojo/TestSchema.java | 40 ++++-
11 files changed, 337 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/ipc/json-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index 1e2385b..124c21b 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -175,6 +175,8 @@ class JsonSchemaWriter {
void WriteTypeMetadata(const TimeType& type) {
writer_->Key("unit");
writer_->String(GetTimeUnitName(type.unit));
+ writer_->Key("bitWidth");
+ writer_->Int(type.bit_width());
}
void WriteTypeMetadata(const DateType& type) {
@@ -608,6 +610,9 @@ static Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type
const auto& json_unit = json_type.FindMember("unit");
RETURN_NOT_STRING("unit", json_unit, json_type);
+ const auto& json_bit_width = json_type.FindMember("bitWidth");
+ RETURN_NOT_INT("bitWidth", json_bit_width, json_type);
+
std::string unit_str = json_unit->value.GetString();
if (unit_str == "SECOND") {
@@ -623,6 +628,14 @@ static Status GetTime(const RjObject& json_type, std::shared_ptr<DataType>* type
ss << "Invalid time unit: " << unit_str;
return Status::Invalid(ss.str());
}
+
+ const auto& fw_type = static_cast<const FixedWidthType&>(**type);
+
+ int bit_width = json_bit_width->value.GetInt();
+ if (bit_width != fw_type.bit_width()) {
+ return Status::Invalid("Indicated bit width does not match unit");
+ }
+
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
index 2ff25ee..d902ec2 100644
--- a/cpp/src/arrow/ipc/metadata.cc
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -255,12 +255,19 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
case flatbuf::Type_Time: {
auto time_type = static_cast<const flatbuf::Time*>(type_data);
TimeUnit unit = FromFlatbufferUnit(time_type->unit());
+ int32_t bit_width = time_type->bitWidth();
switch (unit) {
case TimeUnit::SECOND:
case TimeUnit::MILLI:
+ if (bit_width != 32) {
+ return Status::Invalid("Time is 32 bits for second/milli unit");
+ }
*out = time32(unit);
break;
default:
+ if (bit_width != 64) {
+ return Status::Invalid("Time is 64 bits for micro/nano unit");
+ }
*out = time64(unit);
break;
}
@@ -386,12 +393,12 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
case Type::TIME32: {
const auto& time_type = static_cast<const Time32Type&>(*type);
*out_type = flatbuf::Type_Time;
- *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union();
+ *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit), 32).Union();
} break;
case Type::TIME64: {
const auto& time_type = static_cast<const Time64Type&>(*type);
*out_type = flatbuf::Type_Time;
- *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit)).Union();
+ *offset = flatbuf::CreateTime(fbb, ToFlatbufferUnit(time_type.unit), 64).Union();
} break;
case Type::TIMESTAMP: {
const auto& ts_type = static_cast<const TimestampType&>(*type);
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/type-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index dafadc1..66164e3 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -191,12 +191,15 @@ TEST(TestListType, Basics) {
ASSERT_EQ("list<item: list<item: string>>", lt2.ToString());
}
-TEST(TestDateTypes, ToString) {
+TEST(TestDateTypes, Attrs) {
auto t1 = date32();
auto t2 = date64();
ASSERT_EQ("date32[day]", t1->ToString());
ASSERT_EQ("date64[ms]", t2->ToString());
+
+ ASSERT_EQ(32, static_cast<const FixedWidthType&>(*t1).bit_width());
+ ASSERT_EQ(64, static_cast<const FixedWidthType&>(*t2).bit_width());
}
TEST(TestTimeType, Equals) {
@@ -207,6 +210,9 @@ TEST(TestTimeType, Equals) {
Time64Type t4(TimeUnit::NANO);
Time64Type t5(TimeUnit::MICRO);
+ ASSERT_EQ(32, t0.bit_width());
+ ASSERT_EQ(64, t3.bit_width());
+
ASSERT_TRUE(t0.Equals(t2));
ASSERT_TRUE(t1.Equals(t1));
ASSERT_FALSE(t1.Equals(t3));
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 6b936f3..0e69133 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -18,6 +18,7 @@
#ifndef ARROW_TYPE_H
#define ARROW_TYPE_H
+#include <climits>
#include <cstdint>
#include <memory>
#include <ostream>
@@ -220,7 +221,7 @@ struct ARROW_EXPORT CTypeImpl : public BASE {
CTypeImpl() : BASE(TYPE_ID) {}
- int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * 8); }
+ int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
Status Accept(TypeVisitor* visitor) const override {
return visitor->Visit(*static_cast<const DERIVED*>(this));
@@ -456,7 +457,7 @@ struct ARROW_EXPORT Date32Type : public DateType {
Date32Type();
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); }
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
@@ -470,7 +471,7 @@ struct ARROW_EXPORT Date64Type : public DateType {
Date64Type();
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); }
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
@@ -509,7 +510,7 @@ struct ARROW_EXPORT Time32Type : public TimeType {
static constexpr Type::type type_id = Type::TIME32;
using c_type = int32_t;
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * 4); }
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
explicit Time32Type(TimeUnit unit = TimeUnit::MILLI);
@@ -521,7 +522,7 @@ struct ARROW_EXPORT Time64Type : public TimeType {
static constexpr Type::type type_id = Type::TIME64;
using c_type = int64_t;
- int bit_width() const override { return static_cast<int>(sizeof(c_type) * 8); }
+ int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
explicit Time64Type(TimeUnit unit = TimeUnit::MILLI);
@@ -535,7 +536,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType {
typedef int64_t c_type;
static constexpr Type::type type_id = Type::TIMESTAMP;
- int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); }
+ int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
explicit TimestampType(TimeUnit unit = TimeUnit::MILLI)
: FixedWidthType(Type::TIMESTAMP), unit(unit) {}
@@ -557,7 +558,7 @@ struct ARROW_EXPORT IntervalType : public FixedWidthType {
using c_type = int64_t;
static constexpr Type::type type_id = Type::INTERVAL;
- int bit_width() const override { return static_cast<int>(sizeof(int64_t) * 8); }
+ int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
Unit unit;
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/integration/integration_test.py
----------------------------------------------------------------------
diff --git a/integration/integration_test.py b/integration/integration_test.py
index ec2a38d..6631dc8 100644
--- a/integration/integration_test.py
+++ b/integration/integration_test.py
@@ -175,10 +175,14 @@ TEST_INT_MAX = 2**31 - 1
class IntegerType(PrimitiveType):
- def __init__(self, name, is_signed, bit_width, nullable=True):
+ def __init__(self, name, is_signed, bit_width, nullable=True,
+ min_value=TEST_INT_MIN,
+ max_value=TEST_INT_MAX):
PrimitiveType.__init__(self, name, nullable=nullable)
self.is_signed = is_signed
self.bit_width = bit_width
+ self.min_value = min_value
+ self.max_value = max_value
@property
def numpy_type(self):
@@ -194,14 +198,80 @@ class IntegerType(PrimitiveType):
def generate_column(self, size):
iinfo = np.iinfo(self.numpy_type)
values = [int(x) for x in
- np.random.randint(max(iinfo.min, TEST_INT_MIN),
- min(iinfo.max, TEST_INT_MAX),
+ np.random.randint(max(iinfo.min, self.min_value),
+ min(iinfo.max, self.max_value),
size=size)]
is_valid = self._make_is_valid(size)
return PrimitiveColumn(self.name, size, is_valid, values)
+class DateType(IntegerType):
+
+ DAY = 0
+ MILLISECOND = 1
+
+ def __init__(self, name, unit, nullable=True):
+ self.unit = unit
+ bit_width = 32 if unit == self.DAY else 64
+ IntegerType.__init__(self, name, True, bit_width, nullable=nullable)
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'date'),
+ ('unit', 'DAY' if self.unit == self.DAY else 'MILLISECOND')
+ ])
+
+
+TIMEUNIT_NAMES = {
+ 's': 'SECOND',
+ 'ms': 'MILLISECOND',
+ 'us': 'MICROSECOND',
+ 'ns': 'NANOSECOND'
+}
+
+
+class TimeType(IntegerType):
+
+ BIT_WIDTHS = {
+ 's': 32,
+ 'ms': 32,
+ 'us': 64,
+ 'ns': 64
+ }
+
+ def __init__(self, name, unit='s', nullable=True):
+ self.unit = unit
+ IntegerType.__init__(self, name, True, self.BIT_WIDTHS[unit],
+ nullable=nullable)
+
+ def _get_type(self):
+ return OrderedDict([
+ ('name', 'time'),
+ ('unit', TIMEUNIT_NAMES[self.unit]),
+ ('bitWidth', self.bit_width)
+ ])
+
+
+class TimestampType(IntegerType):
+
+ def __init__(self, name, unit='s', tz=None, nullable=True):
+ self.unit = unit
+ self.tz = tz
+ IntegerType.__init__(self, name, True, 64, nullable=nullable)
+
+ def _get_type(self):
+ fields = [
+ ('name', 'timestamp'),
+ ('unit', TIMEUNIT_NAMES[self.unit])
+ ]
+
+ if self.tz is not None:
+ fields.append(('timezone', self.tz))
+
+ return OrderedDict(fields)
+
+
class FloatingPointType(PrimitiveType):
def __init__(self, name, bit_width, nullable=True):
@@ -509,6 +579,20 @@ def get_field(name, type_, nullable=True):
raise TypeError(dtype)
+def _generate_file(fields, batch_sizes):
+ schema = JSONSchema(fields)
+ batches = []
+ for size in batch_sizes:
+ columns = []
+ for field in fields:
+ col = field.generate_column(size)
+ columns.append(col)
+
+ batches.append(JSONRecordBatch(size, columns))
+
+ return JSONFile(schema, batches)
+
+
def generate_primitive_case():
types = ['bool', 'int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
@@ -520,19 +604,27 @@ def generate_primitive_case():
fields.append(get_field(type_ + "_nullable", type_, True))
fields.append(get_field(type_ + "_nonnullable", type_, False))
- schema = JSONSchema(fields)
-
batch_sizes = [7, 10]
- batches = []
- for size in batch_sizes:
- columns = []
- for field in fields:
- col = field.generate_column(size)
- columns.append(col)
+ return _generate_file(fields, batch_sizes)
- batches.append(JSONRecordBatch(size, columns))
- return JSONFile(schema, batches)
+def generate_datetime_case():
+ fields = [
+ DateType('f0', DateType.DAY),
+ DateType('f1', DateType.MILLISECOND),
+ TimeType('f2', 's'),
+ TimeType('f3', 'ms'),
+ TimeType('f4', 'us'),
+ TimeType('f5', 'ns'),
+ TimestampType('f6', 's'),
+ TimestampType('f7', 'ms'),
+ TimestampType('f8', 'us'),
+ TimestampType('f9', 'ns'),
+ TimestampType('f10', 'ms', tz='America/New_York')
+ ]
+
+ batch_sizes = [7, 10]
+ return _generate_file(fields, batch_sizes)
def generate_nested_case():
@@ -545,19 +637,8 @@ def generate_nested_case():
# ListType('list_nonnullable', get_field('item', 'int32'), False),
]
- schema = JSONSchema(fields)
-
batch_sizes = [7, 10]
- batches = []
- for size in batch_sizes:
- columns = []
- for field in fields:
- col = field.generate_column(size)
- columns.append(col)
-
- batches.append(JSONRecordBatch(size, columns))
-
- return JSONFile(schema, batches)
+ return _generate_file(fields, batch_sizes)
def get_generated_json_files():
@@ -566,13 +647,13 @@ def get_generated_json_files():
def _temp_path():
return
- file_objs = []
-
- K = 10
- for i in range(K):
- file_objs.append(generate_primitive_case())
-
- file_objs.append(generate_nested_case())
+ file_objs = [
+ generate_primitive_case(),
+ generate_primitive_case(),
+ generate_primitive_case(),
+ generate_datetime_case(),
+ generate_nested_case()
+ ]
generated_paths = []
for file_obj in file_objs:
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2181cfd..b08c100 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -56,8 +56,10 @@
{ class: "Int", valueHolder: "IntHolder"},
{ class: "UInt4", valueHolder: "UInt4Holder" },
{ class: "Float4", javaType: "float" , boxedType: "Float", fields: [{name: "value", type: "float"}]},
- { class: "IntervalYear", javaType: "int", friendlyType: "Period" }
- { class: "Time", javaType: "int", friendlyType: "DateTime" }
+ { class: "DateDay" },
+ { class: "IntervalYear", javaType: "int", friendlyType: "Period" },
+ { class: "TimeSec" },
+ { class: "TimeMilli" }
]
},
{
@@ -70,11 +72,13 @@
{ class: "BigInt"},
{ class: "UInt8" },
{ class: "Float8", javaType: "double" , boxedType: "Double", fields: [{name: "value", type: "double"}], },
- { class: "Date", javaType: "long", friendlyType: "DateTime" },
- { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }
- { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }
- { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }
- { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "DateTime" }
+ { class: "DateMilli" },
+ { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "DateTime" },
+ { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "DateTime" },
+ { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "DateTime" },
+ { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "DateTime" },
+ { class: "TimeMicro" },
+ { class: "TimeNano" }
]
},
{
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/codegen/templates/FixedValueVectors.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/FixedValueVectors.java b/java/vector/src/main/codegen/templates/FixedValueVectors.java
index d5265f1..947c82c 100644
--- a/java/vector/src/main/codegen/templates/FixedValueVectors.java
+++ b/java/vector/src/main/codegen/templates/FixedValueVectors.java
@@ -19,6 +19,7 @@
import org.apache.arrow.vector.util.DecimalUtility;
import java.lang.Override;
+import java.util.concurrent.TimeUnit;
<@pp.dropOutputFile />
<#list vv.types as type>
@@ -482,12 +483,15 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
</#if>
- <#if minor.class == "Date">
+ <#if minor.class == "DateDay" ||
+ minor.class == "DateMilli" ||
+ minor.class == "TimeSec" ||
+ minor.class == "TimeMilli" ||
+ minor.class == "TimeMicro" ||
+ minor.class == "TimeNano">
@Override
public ${friendlyType} getObject(int index) {
- org.joda.time.DateTime date = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC);
- date = date.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
- return date;
+ return get(index);
}
<#elseif minor.class == "TimeStampSec">
@@ -554,15 +558,6 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements F
append(months).append(monthString));
}
- <#elseif minor.class == "Time">
- @Override
- public DateTime getObject(int index) {
-
- org.joda.time.DateTime time = new org.joda.time.DateTime(get(index), org.joda.time.DateTimeZone.UTC);
- time = time.withZoneRetainFields(org.joda.time.DateTimeZone.getDefault());
- return time;
- }
-
<#elseif minor.class == "Decimal9" || minor.class == "Decimal18">
@Override
public ${friendlyType} getObject(int index) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
index bdb63b9..2f91205 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java
@@ -32,15 +32,21 @@ import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
-import org.apache.arrow.vector.TimeStampSecVector;
-import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.UInt1Vector;
import org.apache.arrow.vector.UInt2Vector;
@@ -240,6 +246,24 @@ public class JsonFileReader implements AutoCloseable {
case VARCHAR:
((VarCharVector)valueVector).getMutator().setSafe(i, parser.readValueAs(String.class).getBytes(UTF_8));
break;
+ case DATEDAY:
+ ((DateDayVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case DATEMILLI:
+ ((DateMilliVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
+ case TIMESEC:
+ ((TimeSecVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case TIMEMILLI:
+ ((TimeMilliVector)valueVector).getMutator().set(i, parser.readValueAs(Integer.class));
+ break;
+ case TIMEMICRO:
+ ((TimeMicroVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
+ case TIMENANO:
+ ((TimeNanoVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
+ break;
case TIMESTAMPSEC:
((TimeStampSecVector)valueVector).getMutator().set(i, parser.readValueAs(Long.class));
break;
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
index 99040b6..d86b3de 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileWriter.java
@@ -23,11 +23,17 @@ import java.util.List;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.TimeStampSecVector;
-import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.ValueVector.Accessor;
import org.apache.arrow.vector.VarBinaryVector;
@@ -144,6 +150,24 @@ public class JsonFileWriter implements AutoCloseable {
private void writeValueToGenerator(ValueVector valueVector, int i) throws IOException {
switch (valueVector.getMinorType()) {
+ case DATEDAY:
+ generator.writeNumber(((DateDayVector)valueVector).getAccessor().get(i));
+ break;
+ case DATEMILLI:
+ generator.writeNumber(((DateMilliVector)valueVector).getAccessor().get(i));
+ break;
+ case TIMESEC:
+ generator.writeNumber(((TimeSecVector)valueVector).getAccessor().get(i));
+ break;
+ case TIMEMILLI:
+ generator.writeNumber(((TimeMilliVector)valueVector).getAccessor().get(i));
+ break;
+ case TIMEMICRO:
+ generator.writeNumber(((TimeMicroVector)valueVector).getAccessor().get(i));
+ break;
+ case TIMENANO:
+ generator.writeNumber(((TimeNanoVector)valueVector).getAccessor().get(i));
+ break;
case TIMESTAMPSEC:
generator.writeNumber(((TimeStampSecVector)valueVector).getAccessor().get(i));
break;
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index f07bb58..b0455fa 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -25,7 +25,8 @@ import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.NullableBigIntVector;
import org.apache.arrow.vector.NullableBitVector;
-import org.apache.arrow.vector.NullableDateVector;
+import org.apache.arrow.vector.NullableDateDayVector;
+import org.apache.arrow.vector.NullableDateMilliVector;
import org.apache.arrow.vector.NullableDecimalVector;
import org.apache.arrow.vector.NullableFloat4Vector;
import org.apache.arrow.vector.NullableFloat8Vector;
@@ -33,11 +34,14 @@ import org.apache.arrow.vector.NullableIntVector;
import org.apache.arrow.vector.NullableIntervalDayVector;
import org.apache.arrow.vector.NullableIntervalYearVector;
import org.apache.arrow.vector.NullableSmallIntVector;
+import org.apache.arrow.vector.NullableTimeMicroVector;
+import org.apache.arrow.vector.NullableTimeMilliVector;
+import org.apache.arrow.vector.NullableTimeNanoVector;
+import org.apache.arrow.vector.NullableTimeSecVector;
import org.apache.arrow.vector.NullableTimeStampMicroVector;
import org.apache.arrow.vector.NullableTimeStampMilliVector;
import org.apache.arrow.vector.NullableTimeStampNanoVector;
import org.apache.arrow.vector.NullableTimeStampSecVector;
-import org.apache.arrow.vector.NullableTimeVector;
import org.apache.arrow.vector.NullableTinyIntVector;
import org.apache.arrow.vector.NullableUInt1Vector;
import org.apache.arrow.vector.NullableUInt2Vector;
@@ -52,7 +56,8 @@ import org.apache.arrow.vector.complex.NullableMapVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.complex.impl.BigIntWriterImpl;
import org.apache.arrow.vector.complex.impl.BitWriterImpl;
-import org.apache.arrow.vector.complex.impl.DateWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateDayWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateMilliWriterImpl;
import org.apache.arrow.vector.complex.impl.DecimalWriterImpl;
import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
@@ -61,11 +66,14 @@ import org.apache.arrow.vector.complex.impl.IntervalDayWriterImpl;
import org.apache.arrow.vector.complex.impl.IntervalYearWriterImpl;
import org.apache.arrow.vector.complex.impl.NullableMapWriter;
import org.apache.arrow.vector.complex.impl.SmallIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMicroWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeSecWriterImpl;
import org.apache.arrow.vector.complex.impl.TimeStampMicroWriterImpl;
import org.apache.arrow.vector.complex.impl.TimeStampMilliWriterImpl;
import org.apache.arrow.vector.complex.impl.TimeStampNanoWriterImpl;
import org.apache.arrow.vector.complex.impl.TimeStampSecWriterImpl;
-import org.apache.arrow.vector.complex.impl.TimeWriterImpl;
import org.apache.arrow.vector.complex.impl.TinyIntWriterImpl;
import org.apache.arrow.vector.complex.impl.UInt1WriterImpl;
import org.apache.arrow.vector.complex.impl.UInt2WriterImpl;
@@ -164,26 +172,70 @@ public class Types {
return new BigIntWriterImpl((NullableBigIntVector) vector);
}
},
- DATE(new Date(DateUnit.MILLISECOND)) {
+ DATEDAY(new Date(DateUnit.DAY)) {
@Override
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
- return new NullableDateVector(name, fieldType, allocator);
+ return new NullableDateDayVector(name, fieldType, allocator);
}
@Override
public FieldWriter getNewFieldWriter(ValueVector vector) {
- return new DateWriterImpl((NullableDateVector) vector);
+ return new DateDayWriterImpl((NullableDateDayVector) vector);
}
},
- TIME(new Time(TimeUnit.MILLISECOND, 32)) {
+ DATEMILLI(new Date(DateUnit.MILLISECOND)) {
@Override
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
- return new NullableTimeVector(name, fieldType, allocator);
+ return new NullableDateMilliVector(name, fieldType, allocator);
}
@Override
public FieldWriter getNewFieldWriter(ValueVector vector) {
- return new TimeWriterImpl((NullableTimeVector) vector);
+ return new DateMilliWriterImpl((NullableDateMilliVector) vector);
+ }
+ },
+ TIMESEC(new Time(TimeUnit.SECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new NullableTimeSecVector(name, fieldType, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeSecWriterImpl((NullableTimeSecVector) vector);
+ }
+ },
+ TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new NullableTimeMilliVector(name, fieldType, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMilliWriterImpl((NullableTimeMilliVector) vector);
+ }
+ },
+ TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new NullableTimeMicroVector(name, fieldType, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMicroWriterImpl((NullableTimeMicroVector) vector);
+ }
+ },
+ TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new NullableTimeNanoVector(name, fieldType, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeNanoWriterImpl((NullableTimeNanoVector) vector);
}
},
// time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC.
@@ -479,14 +531,29 @@ public class Types {
}
@Override public MinorType visit(Date type) {
- return MinorType.DATE;
+ switch (type.getUnit()) {
+ case DAY:
+ return MinorType.DATEDAY;
+ case MILLISECOND:
+ return MinorType.DATEMILLI;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
}
@Override public MinorType visit(Time type) {
- if (type.getUnit() != TimeUnit.MILLISECOND || type.getBitWidth() != 32) {
- throw new IllegalArgumentException("Only milliseconds on 32 bits supported for now: " + type);
+ switch (type.getUnit()) {
+ case SECOND:
+ return MinorType.TIMESEC;
+ case MILLISECOND:
+ return MinorType.TIMEMILLI;
+ case MICROSECOND:
+ return MinorType.TIMEMICRO;
+ case NANOSECOND:
+ return MinorType.TIMENANO;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
}
- return MinorType.TIME;
}
@Override public MinorType visit(Timestamp type) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/f4fcb42c/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
----------------------------------------------------------------------
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
index 45f3b56..56fa73e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
@@ -86,11 +86,15 @@ public class TestSchema {
field("h", new Binary()),
field("i", new Bool()),
field("j", new Decimal(5, 5)),
- field("k", new Date(DateUnit.MILLISECOND)),
- field("l", new Time(TimeUnit.MILLISECOND, 32)),
- field("m", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
- field("n", new Timestamp(TimeUnit.MICROSECOND, null)),
- field("o", new Interval(IntervalUnit.DAY_TIME))
+ field("k", new Date(DateUnit.DAY)),
+ field("l", new Date(DateUnit.MILLISECOND)),
+ field("m", new Time(TimeUnit.SECOND, 32)),
+ field("n", new Time(TimeUnit.MILLISECOND, 32)),
+ field("o", new Time(TimeUnit.MICROSECOND, 64)),
+ field("p", new Time(TimeUnit.NANOSECOND, 64)),
+ field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("r", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("s", new Interval(IntervalUnit.DAY_TIME))
));
roundTrip(schema);
}
@@ -105,6 +109,32 @@ public class TestSchema {
}
@Test
+ public void testDate() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Date(DateUnit.DAY)),
+ field("b", new Date(DateUnit.MILLISECOND))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Date(DAY), b: Date(MILLISECOND)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testTime() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Time(TimeUnit.SECOND, 32)),
+ field("b", new Time(TimeUnit.MILLISECOND, 32)),
+ field("c", new Time(TimeUnit.MICROSECOND, 64)),
+ field("d", new Time(TimeUnit.NANOSECOND, 64))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Time(SECOND, 32), b: Time(MILLISECOND, 32), c: Time(MICROSECOND, 64), d: Time(NANOSECOND, 64)>",
+ schema.toString());
+ }
+
+ @Test
public void testTS() throws IOException {
Schema schema = new Schema(asList(
field("a", new Timestamp(TimeUnit.SECOND, "UTC")),