You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/06/03 08:34:19 UTC

[arrow] branch master updated: ARROW-5469: [Go] implement read/write IPC for Date32/64 arrays

This is an automated email from the ASF dual-hosted git repository.

sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1eb06b6  ARROW-5469: [Go] implement read/write IPC for Date32/64 arrays
1eb06b6 is described below

commit 1eb06b65bff66064f4003d1bdec468eeb7031f5b
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Jun 3 10:34:01 2019 +0200

    ARROW-5469: [Go] implement read/write IPC for Date32/64 arrays
    
    Needs #4438
    
    Author: Sebastien Binet <bi...@cern.ch>
    
    Closes #4439 from sbinet/issue-5269 and squashes the following commits:
    
    9cb97c1fc <Sebastien Binet> ARROW-5469:  implement read/write IPC for Date32/64 arrays
---
 go/arrow/datatype_fixedwidth.go         |  4 ++++
 go/arrow/internal/arrdata/arrdata.go    | 22 ++++++++++++++++++++++
 go/arrow/ipc/cmd/arrow-cat/main_test.go | 18 ++++++++++++++++++
 go/arrow/ipc/cmd/arrow-ls/main_test.go  |  4 +++-
 go/arrow/ipc/file_reader.go             |  3 ++-
 go/arrow/ipc/metadata.go                | 15 +++++++++++++++
 6 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index edaf2a1..fa7ed3f 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -95,6 +95,8 @@ func (t *Time64Type) String() string { return "time64[" + t.Unit.String() + "]"
 var (
 	FixedWidthTypes = struct {
 		Boolean   FixedWidthDataType
+		Date32    FixedWidthDataType
+		Date64    FixedWidthDataType
 		Float16   FixedWidthDataType
 		Time32s   FixedWidthDataType
 		Time32ms  FixedWidthDataType
@@ -103,6 +105,8 @@ var (
 		Timestamp FixedWidthDataType
 	}{
 		Boolean:   &BooleanType{},
+		Date32:    &Date32Type{},
+		Date64:    &Date64Type{},
 		Float16:   &Float16Type{},
 		Time32s:   &Time32Type{Unit: Second},
 		Time32ms:  &Time32Type{Unit: Millisecond},
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 0497e15..504ade6 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -335,6 +335,8 @@ func makeFixedWidthTypesRecords() []array.Record {
 			arrow.Field{Name: "time64ns", Type: arrow.FixedWidthTypes.Time64ns, Nullable: true},
 			arrow.Field{Name: "time64us", Type: arrow.FixedWidthTypes.Time64us, Nullable: true},
 			arrow.Field{Name: "timestamp", Type: arrow.FixedWidthTypes.Timestamp, Nullable: true},
+			arrow.Field{Name: "date32s", Type: arrow.FixedWidthTypes.Date32, Nullable: true},
+			arrow.Field{Name: "date64s", Type: arrow.FixedWidthTypes.Date64, Nullable: true},
 		}, nil,
 	)
 
@@ -355,6 +357,8 @@ func makeFixedWidthTypesRecords() []array.Record {
 			arrayOf(mem, []time64ns{-2, -1, 0, +1, +2}, mask),
 			arrayOf(mem, []time64us{-2, -1, 0, +1, +2}, mask),
 			arrayOf(mem, []arrow.Timestamp{0, +1, +2, +3, +4}, mask),
+			arrayOf(mem, []arrow.Date32{-2, -1, 0, +1, +2}, mask),
+			arrayOf(mem, []arrow.Date64{-2, -1, 0, +1, +2}, mask),
 		},
 		[]array.Interface{
 			arrayOf(mem, float16s([]float32{+11, +12, +13, +14, +15}), mask),
@@ -363,6 +367,8 @@ func makeFixedWidthTypesRecords() []array.Record {
 			arrayOf(mem, []time64ns{-12, -11, 10, +11, +12}, mask),
 			arrayOf(mem, []time64us{-12, -11, 10, +11, +12}, mask),
 			arrayOf(mem, []arrow.Timestamp{10, +11, +12, +13, +14}, mask),
+			arrayOf(mem, []arrow.Date32{-12, -11, 10, +11, +12}, mask),
+			arrayOf(mem, []arrow.Date64{-12, -11, 10, +11, +12}, mask),
 		},
 		[]array.Interface{
 			arrayOf(mem, float16s([]float32{+21, +22, +23, +24, +25}), mask),
@@ -371,6 +377,8 @@ func makeFixedWidthTypesRecords() []array.Record {
 			arrayOf(mem, []time64ns{-22, -21, 20, +21, +22}, mask),
 			arrayOf(mem, []time64us{-22, -21, 20, +21, +22}, mask),
 			arrayOf(mem, []arrow.Timestamp{20, +21, +22, +23, +24}, mask),
+			arrayOf(mem, []arrow.Date32{-22, -21, 20, +21, +22}, mask),
+			arrayOf(mem, []arrow.Date64{-22, -21, 20, +21, +22}, mask),
 		},
 	}
 
@@ -545,6 +553,20 @@ func arrayOf(mem memory.Allocator, a interface{}, valids []bool) array.Interface
 		bldr.AppendValues(a, valids)
 		return bldr.NewArray()
 
+	case []arrow.Date32:
+		bldr := array.NewDate32Builder(mem)
+		defer bldr.Release()
+
+		bldr.AppendValues(a, valids)
+		return bldr.NewArray()
+
+	case []arrow.Date64:
+		bldr := array.NewDate64Builder(mem)
+		defer bldr.Release()
+
+		bldr.AppendValues(a, valids)
+		return bldr.NewArray()
+
 	default:
 		panic(fmt.Errorf("arrdata: invalid data slice type %T", a))
 	}
diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go
index 36ddc55..5a8f031 100644
--- a/go/arrow/ipc/cmd/arrow-cat/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go
@@ -127,6 +127,8 @@ record 3...
   col[3] "time64ns": [-2 (null) (null) 1 2]
   col[4] "time64us": [-2 (null) (null) 1 2]
   col[5] "timestamp": [0 (null) (null) 3 4]
+  col[6] "date32s": [-2 (null) (null) 1 2]
+  col[7] "date64s": [-2 (null) (null) 1 2]
 record 2...
   col[0] "float16s": [11 (null) (null) 14 15]
   col[1] "time32ms": [-12 (null) (null) 11 12]
@@ -134,6 +136,8 @@ record 2...
   col[3] "time64ns": [-12 (null) (null) 11 12]
   col[4] "time64us": [-12 (null) (null) 11 12]
   col[5] "timestamp": [10 (null) (null) 13 14]
+  col[6] "date32s": [-12 (null) (null) 11 12]
+  col[7] "date64s": [-12 (null) (null) 11 12]
 record 3...
   col[0] "float16s": [21 (null) (null) 24 25]
   col[1] "time32ms": [-22 (null) (null) 21 22]
@@ -141,6 +145,8 @@ record 3...
   col[3] "time64ns": [-22 (null) (null) 21 22]
   col[4] "time64us": [-22 (null) (null) 21 22]
   col[5] "timestamp": [20 (null) (null) 23 24]
+  col[6] "date32s": [-22 (null) (null) 21 22]
+  col[7] "date64s": [-22 (null) (null) 21 22]
 `,
 		},
 	} {
@@ -390,6 +396,8 @@ record 3/3...
   col[3] "time64ns": [-2 (null) (null) 1 2]
   col[4] "time64us": [-2 (null) (null) 1 2]
   col[5] "timestamp": [0 (null) (null) 3 4]
+  col[6] "date32s": [-2 (null) (null) 1 2]
+  col[7] "date64s": [-2 (null) (null) 1 2]
 record 2...
   col[0] "float16s": [11 (null) (null) 14 15]
   col[1] "time32ms": [-12 (null) (null) 11 12]
@@ -397,6 +405,8 @@ record 2...
   col[3] "time64ns": [-12 (null) (null) 11 12]
   col[4] "time64us": [-12 (null) (null) 11 12]
   col[5] "timestamp": [10 (null) (null) 13 14]
+  col[6] "date32s": [-12 (null) (null) 11 12]
+  col[7] "date64s": [-12 (null) (null) 11 12]
 record 3...
   col[0] "float16s": [21 (null) (null) 24 25]
   col[1] "time32ms": [-22 (null) (null) 21 22]
@@ -404,6 +414,8 @@ record 3...
   col[3] "time64ns": [-22 (null) (null) 21 22]
   col[4] "time64us": [-22 (null) (null) 21 22]
   col[5] "timestamp": [20 (null) (null) 23 24]
+  col[6] "date32s": [-22 (null) (null) 21 22]
+  col[7] "date64s": [-22 (null) (null) 21 22]
 `,
 		},
 		{
@@ -416,6 +428,8 @@ record 1/3...
   col[3] "time64ns": [-2 (null) (null) 1 2]
   col[4] "time64us": [-2 (null) (null) 1 2]
   col[5] "timestamp": [0 (null) (null) 3 4]
+  col[6] "date32s": [-2 (null) (null) 1 2]
+  col[7] "date64s": [-2 (null) (null) 1 2]
 record 2/3...
   col[0] "float16s": [11 (null) (null) 14 15]
   col[1] "time32ms": [-12 (null) (null) 11 12]
@@ -423,6 +437,8 @@ record 2/3...
   col[3] "time64ns": [-12 (null) (null) 11 12]
   col[4] "time64us": [-12 (null) (null) 11 12]
   col[5] "timestamp": [10 (null) (null) 13 14]
+  col[6] "date32s": [-12 (null) (null) 11 12]
+  col[7] "date64s": [-12 (null) (null) 11 12]
 record 3/3...
   col[0] "float16s": [21 (null) (null) 24 25]
   col[1] "time32ms": [-22 (null) (null) 21 22]
@@ -430,6 +446,8 @@ record 3/3...
   col[3] "time64ns": [-22 (null) (null) 21 22]
   col[4] "time64us": [-22 (null) (null) 21 22]
   col[5] "timestamp": [20 (null) (null) 23 24]
+  col[6] "date32s": [-22 (null) (null) 21 22]
+  col[7] "date64s": [-22 (null) (null) 21 22]
 `,
 		},
 	} {
diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go
index fac024f..3b50b89 100644
--- a/go/arrow/ipc/cmd/arrow-ls/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go
@@ -90,13 +90,15 @@ records: 3
 		{
 			name: "fixed_width_types",
 			want: `schema:
-  fields: 6
+  fields: 8
     - float16s: type=float16, nullable
     - time32ms: type=time32[ms], nullable
     - time32s: type=time32[s], nullable
     - time64ns: type=time64[ns], nullable
     - time64us: type=time64[us], nullable
     - timestamp: type=timestamp[ns], nullable
+    - date32s: type=date32, nullable
+    - date64s: type=date64, nullable
 records: 3
 `,
 		},
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 1ffee78..b8f08cc 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -361,7 +361,8 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 		*arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type,
 		*arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type,
 		*arrow.Time32Type, *arrow.Time64Type,
-		*arrow.TimestampType:
+		*arrow.TimestampType,
+		*arrow.Date32Type, *arrow.Date64Type:
 		return ctx.loadPrimitive(dt)
 
 	case *arrow.BinaryType, *arrow.StringType:
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 3c45ff5..91db1f2 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -532,6 +532,11 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		dt.Init(data.Bytes, data.Pos)
 		return timestampFromFB(dt)
 
+	case flatbuf.TypeDate:
+		var dt flatbuf.Date
+		dt.Init(data.Bytes, data.Pos)
+		return dateFromFB(dt)
+
 	default:
 		// FIXME(sbinet): implement all the other types.
 		panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))
@@ -651,6 +656,16 @@ func timestampFromFB(data flatbuf.Timestamp) (arrow.DataType, error) {
 	return &arrow.TimestampType{Unit: unit, TimeZone: tz}, nil
 }
 
+func dateFromFB(data flatbuf.Date) (arrow.DataType, error) {
+	switch data.Unit() {
+	case flatbuf.DateUnitDAY:
+		return arrow.FixedWidthTypes.Date32, nil
+	case flatbuf.DateUnitMILLISECOND:
+		return arrow.FixedWidthTypes.Date64, nil
+	}
+	return nil, errors.Errorf("arrow/ipc: Date type with %d unit not implemented", data.Unit())
+}
+
 type customMetadataer interface {
 	CustomMetadataLength() int
 	CustomMetadata(*flatbuf.KeyValue, int) bool