You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/29 08:27:38 UTC
[arrow] branch master updated: ARROW-5110: [Go] implement reading
struct arrays from Arrow file
This is an automated email from the ASF dual-hosted git repository.
sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cd4c2ca ARROW-5110: [Go] implement reading struct arrays from Arrow file
cd4c2ca is described below
commit cd4c2ca6b32f038beb41bd8da9af9a2d8762c60b
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Apr 29 10:27:26 2019 +0200
ARROW-5110: [Go] implement reading struct arrays from Arrow file
Author: Sebastien Binet <bi...@cern.ch>
Closes #4158 from sbinet/issue-5110 and squashes the following commits:
f6f9b9ea <Sebastien Binet> arrow/array: provide List array stringer implementation
b838ee04 <Sebastien Binet> arrow/array: fix Struct array Stringer implementation
d496c2ff <Sebastien Binet> ARROW-5110: implement reading struct arrays from Arrow file
---
go/arrow/array/list.go | 23 +++++++++++++++++++
go/arrow/array/struct.go | 11 +++------
go/arrow/array/struct_test.go | 53 +++++++++++++++++++++++++++++++++++++++++++
go/arrow/example_test.go | 2 ++
go/arrow/ipc/file_reader.go | 24 ++++++++++++++++++++
go/arrow/ipc/metadata.go | 3 +++
6 files changed, 108 insertions(+), 8 deletions(-)
diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 1c4c01f..b571c50 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -17,6 +17,8 @@
package array
import (
+ "fmt"
+ "strings"
"sync/atomic"
"github.com/apache/arrow/go/arrow"
@@ -42,6 +44,27 @@ func NewListData(data *Data) *List {
func (a *List) ListValues() Interface { return a.values }
+func (a *List) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if !a.IsValid(i) {
+ o.WriteString("(null)")
+ continue
+ }
+ beg := int64(a.offsets[i])
+ end := int64(a.offsets[i+1])
+ sub := NewSlice(a.values, beg, end)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
func (a *List) setData(data *Data) {
a.array.setData(data)
vals := data.buffers[1]
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
index 9a65be0..55fd913 100644
--- a/go/arrow/array/struct.go
+++ b/go/arrow/array/struct.go
@@ -46,19 +46,14 @@ func (a *Struct) Field(i int) Interface { return a.fields[i] }
func (a *Struct) String() string {
o := new(strings.Builder)
- o.WriteString("[")
+ o.WriteString("{")
for i, v := range a.fields {
if i > 0 {
o.WriteString(" ")
}
- switch {
- case a.IsNull(i):
- o.WriteString("(null)")
- default:
- fmt.Fprintf(o, "%v", v)
- }
+ fmt.Fprintf(o, "%v", v)
}
- o.WriteString("]")
+ o.WriteString("}")
return o.String()
}
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
index ba21f20..d9701ce 100644
--- a/go/arrow/array/struct_test.go
+++ b/go/arrow/array/struct_test.go
@@ -247,3 +247,56 @@ func TestStructArrayBulkAppend(t *testing.T) {
}
}
}
+
+func TestStructArrayStringer(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ var (
+ f1s = []float64{1.1, 1.2, 1.3, 1.4}
+ f2s = []int32{1, 2, 3, 4}
+
+ fields = []arrow.Field{
+ {Name: "f1", Type: arrow.PrimitiveTypes.Float64},
+ {Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+ }
+ dtype = arrow.StructOf(fields...)
+ )
+
+ sb := array.NewStructBuilder(pool, dtype)
+ defer sb.Release()
+
+ f1b := sb.FieldBuilder(0).(*array.Float64Builder)
+ defer f1b.Release()
+
+ f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+ defer f2b.Release()
+
+ if got, want := sb.NumField(), 2; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ for i := range f1s {
+ sb.Append(true)
+ switch i {
+ case 1:
+ f1b.AppendNull()
+ f2b.Append(f2s[i])
+ case 2:
+ f1b.Append(f1s[i])
+ f2b.AppendNull()
+ default:
+ f1b.Append(f1s[i])
+ f2b.Append(f2s[i])
+ }
+ }
+
+ arr := sb.NewArray().(*array.Struct)
+ defer arr.Release()
+
+ want := "{[1.1 (null) 1.3 1.4] [1 2 (null) 4]}"
+ got := arr.String()
+ if got != want {
+ t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want)
+ }
+}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 9172a5b..8015cbf 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -188,6 +188,7 @@ func Example_listArray() {
pos = int(offsets[i])
fmt.Printf("]\n")
}
+ fmt.Printf("List = %v\n", arr)
// Output:
// NullN() = 2
@@ -200,6 +201,7 @@ func Example_listArray() {
// List[4] = [6, 7, 8]
// List[5] = (null)
// List[6] = [9]
+ // List = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
}
// This example shows how to create a Struct array.
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 1765e38..26e48fa 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -367,6 +367,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
case *arrow.ListType:
return ctx.loadList(dt)
+ case *arrow.StructType:
+ return ctx.loadStruct(dt)
+
default:
panic(errors.Errorf("array type %T not handled yet", dt))
}
@@ -448,6 +451,27 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
return array.NewListData(data)
}
+func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) array.Interface {
+ field, buffers := ctx.loadCommon(1)
+
+ arrs := make([]array.Interface, len(dt.Fields()))
+ subs := make([]*array.Data, len(dt.Fields()))
+ for i, f := range dt.Fields() {
+ arrs[i] = ctx.loadChild(f.Type)
+ subs[i] = arrs[i].Data()
+ }
+ defer func() {
+ for i := range arrs {
+ arrs[i].Release()
+ }
+ }()
+
+ data := array.NewData(dt, int(field.Length()), buffers, subs, int(field.NullCount()), 0)
+ defer data.Release()
+
+ return array.NewStructData(data)
+}
+
func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) {
// msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0)
// var dictBatch flatbuf.DictionaryBatch
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index cfbfd24..11665b9 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -254,6 +254,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
}
return arrow.ListOf(children[0].Type), nil
+ case flatbuf.TypeStruct_:
+ return arrow.StructOf(children...), nil
+
default:
// FIXME(sbinet): implement all the other types.
panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))