You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2022/08/02 14:59:47 UTC
[arrow] branch master updated: ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 901e132b05 ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)
901e132b05 is described below
commit 901e132b05c2d2f2b805f3e7aad8f28c1b2d2260
Author: George Godik <gg...@gmail.com>
AuthorDate: Tue Aug 2 10:59:41 2022 -0400
ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)
Newly supported types
- Date32
- Date64
- Timestamp
csv.Reader currently supports Timestamps. Not adding Date32/64 support to CSV as the default behavior will stay the same and parse as the broadest `timestamp` type
https://issues.apache.org/jira/browse/ARROW-17273
Authored-by: ggodik <gg...@factset.com>
Signed-off-by: Matt Topol <zo...@gmail.com>
---
go/arrow/csv/common.go | 1 +
go/arrow/csv/writer.go | 29 +++++++++++++++++++++++++++++
go/arrow/csv/writer_test.go | 28 +++++++++++++++++++++++-----
3 files changed, 53 insertions(+), 5 deletions(-)
diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 0f1b9c4bb2..92427cba9e 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -168,6 +168,7 @@ func validate(schema *arrow.Schema) {
case *arrow.Float32Type, *arrow.Float64Type:
case *arrow.StringType:
case *arrow.TimestampType:
+ case *arrow.Date32Type, *arrow.Date64Type:
default:
panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
}
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index 83b8e1e073..a6ccf5b7fc 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -188,6 +188,35 @@ func (w *Writer) Write(record arrow.Record) error {
recs[i][j] = w.nullValue
}
}
+ case *arrow.Date32Type:
+ arr := col.(*array.Date32)
+ for i := 0; i < arr.Len(); i++ {
+ if arr.IsValid(i) {
+ recs[i][j] = arr.Value(i).FormattedString()
+ } else {
+ recs[i][j] = w.nullValue
+ }
+ }
+ case *arrow.Date64Type:
+ arr := col.(*array.Date64)
+ for i := 0; i < arr.Len(); i++ {
+ if arr.IsValid(i) {
+ recs[i][j] = arr.Value(i).FormattedString()
+ } else {
+ recs[i][j] = w.nullValue
+ }
+ }
+
+ case *arrow.TimestampType:
+ arr := col.(*array.Timestamp)
+ t := w.schema.Field(j).Type.(*arrow.TimestampType)
+ for i := 0; i < arr.Len(); i++ {
+ if arr.IsValid(i) {
+ recs[i][j] = arr.Value(i).ToTime(t.Unit).Format("2006-01-02 15:04:05.999999999")
+ } else {
+ recs[i][j] = w.nullValue
+ }
+ }
}
}
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index e9cd417d28..31593f4969 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -139,6 +139,18 @@ func TestCSVWriter(t *testing.T) {
}
}
+func genTimestamps(unit arrow.TimeUnit) []arrow.Timestamp {
+ out := []arrow.Timestamp{}
+ for _, input := range []string{"2014-07-28 15:04:05", "2016-09-08 15:04:05", "2021-09-18 15:04:05"} {
+ ts, err := arrow.TimestampFromString(input, unit)
+ if err != nil {
+ panic(fmt.Errorf("could not convert %s to arrow.Timestamp err=%s", input, err))
+ }
+ out = append(out, ts)
+ }
+ return out
+}
+
func testCSVWriter(t *testing.T, writeHeader bool) {
f := new(bytes.Buffer)
@@ -158,6 +170,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
{Name: "str", Type: arrow.BinaryTypes.String},
+ {Name: "ts_s", Type: arrow.FixedWidthTypes.Timestamp_s},
+ {Name: "d32", Type: arrow.FixedWidthTypes.Date32},
+ {Name: "d64", Type: arrow.FixedWidthTypes.Date64},
},
nil,
)
@@ -177,6 +192,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
b.Field(9).(*array.Float32Builder).AppendValues([]float32{0.0, 0.1, 0.2}, nil)
b.Field(10).(*array.Float64Builder).AppendValues([]float64{0.0, 0.1, 0.2}, nil)
b.Field(11).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2"}, nil)
+ b.Field(12).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), nil)
+ b.Field(13).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 19304, 20304}, nil)
+ b.Field(14).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 1940400000000, 2040400000000}, nil)
for _, field := range b.Fields() {
field.AppendNull()
@@ -206,14 +224,14 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
t.Fatal(err)
}
- want := `true;-1;-1;-1;-1;0;0;0;0;0;0;str-0
-false;0;0;0;0;1;1;1;1;0.1;0.1;str-1
-true;1;1;1;1;2;2;2;2;0.2;0.2;str-2
-null;null;null;null;null;null;null;null;null;null;null;null
+ want := `true;-1;-1;-1;-1;0;0;0;0;0;0;str-0;2014-07-28 15:04:05;2017-05-18;2028-04-26
+false;0;0;0;0;1;1;1;1;0.1;0.1;str-1;2016-09-08 15:04:05;2022-11-08;2031-06-28
+true;1;1;1;1;2;2;2;2;0.2;0.2;str-2;2021-09-18 15:04:05;2025-08-04;2034-08-28
+null;null;null;null;null;null;null;null;null;null;null;null;null;null;null
`
if writeHeader {
- want = "bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str\n" + want
+ want = "bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str;ts_s;d32;d64\n" + want
}
if got, want := f.String(), want; strings.Compare(got, want) != 0 {