You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2022/08/02 14:59:47 UTC

[arrow] branch master updated: ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 901e132b05 ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)
901e132b05 is described below

commit 901e132b05c2d2f2b805f3e7aad8f28c1b2d2260
Author: George Godik <gg...@gmail.com>
AuthorDate: Tue Aug 2 10:59:41 2022 -0400

    ARROW-17273: [Go][CSV] Add Timestamp, Date32, Date64 format support to csv.Writer (#13772)
    
    Newly supported types
    
    - Date32
    - Date64
    - Timestamp
    
    csv.Reader currently supports Timestamps. Not adding Date32/64 support to CSV as the default behavior will stay the same and parse as the broadest `timestamp` type
    
    https://issues.apache.org/jira/browse/ARROW-17273
    
    Authored-by: ggodik <gg...@factset.com>
    Signed-off-by: Matt Topol <zo...@gmail.com>
---
 go/arrow/csv/common.go      |  1 +
 go/arrow/csv/writer.go      | 29 +++++++++++++++++++++++++++++
 go/arrow/csv/writer_test.go | 28 +++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 0f1b9c4bb2..92427cba9e 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -168,6 +168,7 @@ func validate(schema *arrow.Schema) {
 		case *arrow.Float32Type, *arrow.Float64Type:
 		case *arrow.StringType:
 		case *arrow.TimestampType:
+		case *arrow.Date32Type, *arrow.Date64Type:
 		default:
 			panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
 		}
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index 83b8e1e073..a6ccf5b7fc 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -188,6 +188,35 @@ func (w *Writer) Write(record arrow.Record) error {
 					recs[i][j] = w.nullValue
 				}
 			}
+		case *arrow.Date32Type:
+			arr := col.(*array.Date32)
+			for i := 0; i < arr.Len(); i++ {
+				if arr.IsValid(i) {
+					recs[i][j] = arr.Value(i).FormattedString()
+				} else {
+					recs[i][j] = w.nullValue
+				}
+			}
+		case *arrow.Date64Type:
+			arr := col.(*array.Date64)
+			for i := 0; i < arr.Len(); i++ {
+				if arr.IsValid(i) {
+					recs[i][j] = arr.Value(i).FormattedString()
+				} else {
+					recs[i][j] = w.nullValue
+				}
+			}
+
+		case *arrow.TimestampType:
+			arr := col.(*array.Timestamp)
+			t := w.schema.Field(j).Type.(*arrow.TimestampType)
+			for i := 0; i < arr.Len(); i++ {
+				if arr.IsValid(i) {
+					recs[i][j] = arr.Value(i).ToTime(t.Unit).Format("2006-01-02 15:04:05.999999999")
+				} else {
+					recs[i][j] = w.nullValue
+				}
+			}
 		}
 	}
 
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index e9cd417d28..31593f4969 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -139,6 +139,18 @@ func TestCSVWriter(t *testing.T) {
 	}
 }
 
+func genTimestamps(unit arrow.TimeUnit) []arrow.Timestamp {
+	out := []arrow.Timestamp{}
+	for _, input := range []string{"2014-07-28 15:04:05", "2016-09-08 15:04:05", "2021-09-18 15:04:05"} {
+		ts, err := arrow.TimestampFromString(input, unit)
+		if err != nil {
+			panic(fmt.Errorf("could not convert %s to arrow.Timestamp err=%s", input, err))
+		}
+		out = append(out, ts)
+	}
+	return out
+}
+
 func testCSVWriter(t *testing.T, writeHeader bool) {
 	f := new(bytes.Buffer)
 
@@ -158,6 +170,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
 			{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
 			{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
 			{Name: "str", Type: arrow.BinaryTypes.String},
+			{Name: "ts_s", Type: arrow.FixedWidthTypes.Timestamp_s},
+			{Name: "d32", Type: arrow.FixedWidthTypes.Date32},
+			{Name: "d64", Type: arrow.FixedWidthTypes.Date64},
 		},
 		nil,
 	)
@@ -177,6 +192,9 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
 	b.Field(9).(*array.Float32Builder).AppendValues([]float32{0.0, 0.1, 0.2}, nil)
 	b.Field(10).(*array.Float64Builder).AppendValues([]float64{0.0, 0.1, 0.2}, nil)
 	b.Field(11).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2"}, nil)
+	b.Field(12).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), nil)
+	b.Field(13).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 19304, 20304}, nil)
+	b.Field(14).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 1940400000000, 2040400000000}, nil)
 
 	for _, field := range b.Fields() {
 		field.AppendNull()
@@ -206,14 +224,14 @@ func testCSVWriter(t *testing.T, writeHeader bool) {
 		t.Fatal(err)
 	}
 
-	want := `true;-1;-1;-1;-1;0;0;0;0;0;0;str-0
-false;0;0;0;0;1;1;1;1;0.1;0.1;str-1
-true;1;1;1;1;2;2;2;2;0.2;0.2;str-2
-null;null;null;null;null;null;null;null;null;null;null;null
+	want := `true;-1;-1;-1;-1;0;0;0;0;0;0;str-0;2014-07-28 15:04:05;2017-05-18;2028-04-26
+false;0;0;0;0;1;1;1;1;0.1;0.1;str-1;2016-09-08 15:04:05;2022-11-08;2031-06-28
+true;1;1;1;1;2;2;2;2;0.2;0.2;str-2;2021-09-18 15:04:05;2025-08-04;2034-08-28
+null;null;null;null;null;null;null;null;null;null;null;null;null;null;null
 `
 
 	if writeHeader {
-		want = "bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str\n" + want
+		want = "bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str;ts_s;d32;d64\n" + want
 	}
 
 	if got, want := f.String(), want; strings.Compare(got, want) != 0 {