You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2022/05/15 17:35:47 UTC
[arrow] branch master updated: ARROW-16504 [Go][CSV] Add arrow.TimestampType support to the reader
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9465466abb ARROW-16504 [Go][CSV] Add arrow.TimestampType support to the reader
9465466abb is described below
commit 9465466abb223b190478f3171e4291db5a14b1ee
Author: Mark Wolfe <ma...@wolfe.id.au>
AuthorDate: Sun May 15 13:35:30 2022 -0400
ARROW-16504 [Go][CSV] Add arrow.TimestampType support to the reader
There is already a helper to convert strings to arrow.Timestamp so incorporate this into the CSV reader.
The CSV files I am currently working with have RFC3339 timestamps so I followed some of the code JSON and stuck with millisecond default.
Was really easy to add this using the existing functions and structure.
Closes #13098 from wolfeidau/ARROW-16504-add-timestamp-support-to-reader
Authored-by: Mark Wolfe <ma...@wolfe.id.au>
Signed-off-by: Matt Topol <zo...@gmail.com>
---
go/arrow/csv/common.go | 1 +
go/arrow/csv/reader.go | 23 ++++++++++++++++++++++-
go/arrow/csv/reader_test.go | 4 ++++
go/arrow/csv/testdata/header.csv | 8 ++++----
go/arrow/csv/testdata/types.csv | 8 ++++----
5 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index a43059b1b0..0f1b9c4bb2 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -167,6 +167,7 @@ func validate(schema *arrow.Schema) {
case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type:
case *arrow.Float32Type, *arrow.Float64Type:
case *arrow.StringType:
+ case *arrow.TimestampType:
default:
panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft))
}
diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go
index ef59cdd97d..9e270d625f 100644
--- a/go/arrow/csv/reader.go
+++ b/go/arrow/csv/reader.go
@@ -261,7 +261,7 @@ func (r *Reader) read(recs []string) {
}
func (r *Reader) initFieldConverter(field *arrow.Field) func(array.Builder, string) {
- switch field.Type.(type) {
+ switch dt := field.Type.(type) {
case *arrow.BooleanType:
return func(field array.Builder, str string) {
r.parseBool(field, str)
@@ -321,6 +321,10 @@ func (r *Reader) initFieldConverter(field *arrow.Field) func(array.Builder, stri
field.(*array.StringBuilder).Append(str)
}
}
+ case *arrow.TimestampType:
+ return func(field array.Builder, str string) {
+ r.parseTimestamp(field, str, dt.Unit)
+ }
default:
panic(fmt.Errorf("arrow/csv: unhandled field type %T", field.Type))
@@ -507,6 +511,23 @@ func (r *Reader) parseFloat64(field array.Builder, str string) {
field.(*array.Float64Builder).Append(v)
}
+// parses timestamps using millisecond precision
+func (r *Reader) parseTimestamp(field array.Builder, str string, unit arrow.TimeUnit) {
+ if r.isNull(str) {
+ field.AppendNull()
+ return
+ }
+
+ v, err := arrow.TimestampFromString(str, unit)
+ if err != nil && r.err == nil {
+ r.err = err
+ field.AppendNull()
+ return
+ }
+
+ field.(*array.TimestampBuilder).Append(v)
+}
+
// Retain increases the reference count by 1.
// Retain may be called simultaneously from multiple goroutines.
func (r *Reader) Retain() {
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
index d8a67b91eb..6f0316e35a 100644
--- a/go/arrow/csv/reader_test.go
+++ b/go/arrow/csv/reader_test.go
@@ -201,6 +201,7 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool) {
arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
arrow.Field{Name: "str", Type: arrow.BinaryTypes.String},
+ arrow.Field{Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms},
},
nil,
)
@@ -246,6 +247,7 @@ rec[0]["u64"]: [1]
rec[0]["f32"]: [1.1]
rec[0]["f64"]: [1.1]
rec[0]["str"]: ["str-1"]
+rec[0]["ts"]: [1652054461000]
rec[1]["bool"]: [false]
rec[1]["i8"]: [-2]
rec[1]["i16"]: [-2]
@@ -258,6 +260,7 @@ rec[1]["u64"]: [2]
rec[1]["f32"]: [2.2]
rec[1]["f64"]: [2.2]
rec[1]["str"]: ["str-2"]
+rec[1]["ts"]: [1652140799000]
rec[2]["bool"]: [(null)]
rec[2]["i8"]: [(null)]
rec[2]["i16"]: [(null)]
@@ -270,6 +273,7 @@ rec[2]["u64"]: [(null)]
rec[2]["f32"]: [(null)]
rec[2]["f64"]: [(null)]
rec[2]["str"]: [(null)]
+rec[2]["ts"]: [(null)]
`
if got, want := out.String(), want; got != want {
diff --git a/go/arrow/csv/testdata/header.csv b/go/arrow/csv/testdata/header.csv
index bbcd33f44f..a55f5ff062 100644
--- a/go/arrow/csv/testdata/header.csv
+++ b/go/arrow/csv/testdata/header.csv
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
#
-bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str
-true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;str-1
-false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;str-2
-null;null;null;null;null;null;null;null;null;null;null;null
+bool;i8;i16;i32;i64;u8;u16;u32;u64;f32;f64;str;ts
+true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;str-1;2022-05-09T00:01:01
+false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;str-2;2022-05-09T23:59:59
+null;NULL;null;N/A;;null;null;null;null;null;null;null;null
\ No newline at end of file
diff --git a/go/arrow/csv/testdata/types.csv b/go/arrow/csv/testdata/types.csv
index 1c9c4afe95..14153e6f05 100644
--- a/go/arrow/csv/testdata/types.csv
+++ b/go/arrow/csv/testdata/types.csv
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
#
-## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float32;float64;string
-true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;str-1
-false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;str-2
-null;NULL;null;N/A;;null;null;null;null;null;null;null
+## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float32;float64;string;timestamp
+true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;str-1;2022-05-09T00:01:01
+false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;str-2;2022-05-09T23:59:59
+null;NULL;null;N/A;;null;null;null;null;null;null;null;null
\ No newline at end of file