You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/08/13 18:00:07 UTC
[arrow] branch master updated: ARROW-3022: [Go] add support for
Struct arrays
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 2dfb01f ARROW-3022: [Go] add support for Struct arrays
2dfb01f is described below
commit 2dfb01fd8632618315e2d9e215b5cfafb0cdd1a8
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Aug 13 19:59:57 2018 +0200
ARROW-3022: [Go] add support for Struct arrays
Author: Sebastien Binet <bi...@cern.ch>
Closes #2411 from sbinet/issue-3022 and squashes the following commits:
101752a <Sebastien Binet> ARROW-3022: add support for Struct arrays
---
go/README.md | 6 +-
go/arrow/array/array.go | 1 +
go/arrow/array/array_test.go | 6 +
go/arrow/array/builder.go | 8 ++
go/arrow/array/struct.go | 201 ++++++++++++++++++++++++++++++++
go/arrow/array/struct_test.go | 243 +++++++++++++++++++++++++++++++++++++++
go/arrow/datatype_nested.go | 87 ++++++++++++++
go/arrow/datatype_nested_test.go | 185 +++++++++++++++++++++++++++++
go/arrow/example_test.go | 90 +++++++++++++++
go/arrow/metadata/schema.go | 16 +--
10 files changed, 826 insertions(+), 17 deletions(-)
diff --git a/go/README.md b/go/README.md
index e4d349d..da18937 100644
--- a/go/README.md
+++ b/go/README.md
@@ -143,8 +143,8 @@ them internally in the [ifql][] execution engine and storage layers of [InfluxDB
- [ ] Time64 (microseconds or nanoseconds since midnight)
- [ ] Decimal (128-bit)
- [ ] Fixed-sized binary
-- [ ] List
-- [ ] Struct
+- [x] List
+- [x] Struct
- [ ] Union
- [ ] Dense
- [ ] Sparse
@@ -171,4 +171,4 @@ Serialization is planned for a future iteration.
[arrow]: https://arrow.apache.org
[ifql]: https://github.com/influxdata/ifql
[InfluxDB]: https://github.com/influxdata/influxdb
-[c2goasm]: https://github.com/minio/c2goasm
\ No newline at end of file
+[c2goasm]: https://github.com/minio/c2goasm
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index 61cefac..f45bec4 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -186,4 +186,5 @@ func MakeFromData(data *Data) Interface {
func init() {
makeArrayFn[arrow.LIST] = func(data *Data) Interface { return NewListData(data) }
+ makeArrayFn[arrow.STRUCT] = func(data *Data) Interface { return NewStructData(data) }
}
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index cd051ee..b442c76 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -66,6 +66,12 @@ func TestMakeFromData(t *testing.T) {
array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
}},
+ {name: "struct", d: &testDataType{arrow.STRUCT}},
+ {name: "struct", d: &testDataType{arrow.STRUCT}, child: []*array.Data{
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+ }},
+
// invalid types
{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"},
{name: "invalid(28)", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "invalid data type: Type(28)"},
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index a20c303..2e237f4 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -44,10 +44,16 @@ type Builder interface {
// NullN returns the number of null values in the array builder.
NullN() int
+ // AppendNull adds a new null value to the array being built.
+ AppendNull()
+
// NewArray creates a new array from the memory buffers used
// by the builder and resets the Builder so it can be used to build
// a new array.
NewArray() Interface
+
+ init(capacity int)
+ resize(newBits int, init func(int))
}
// builder provides common functionality for managing the validity bitmap (nulls) when building arrays.
@@ -227,6 +233,8 @@ func newBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
typ := dtype.(*arrow.ListType)
return NewListBuilder(mem, typ.Elem())
case arrow.STRUCT:
+ typ := dtype.(*arrow.StructType)
+ return NewStructBuilder(mem, typ)
case arrow.UNION:
case arrow.DICTIONARY:
case arrow.MAP:
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
new file mode 100644
index 0000000..9e0c1aa
--- /dev/null
+++ b/go/arrow/array/struct.go
@@ -0,0 +1,201 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+ "sync/atomic"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/internal/bitutil"
+ "github.com/apache/arrow/go/arrow/internal/debug"
+ "github.com/apache/arrow/go/arrow/memory"
+)
+
+// Struct represents an ordered sequence of relative types.
+type Struct struct {
+ array
+ fields []Interface
+}
+
+// NewStructData returns a new Struct array value from data.
+func NewStructData(data *Data) *Struct {
+ a := &Struct{}
+ a.refCount = 1
+ a.setData(data)
+ return a
+}
+
+func (a *Struct) NumField() int { return len(a.fields) }
+func (a *Struct) Field(i int) Interface { return a.fields[i] }
+
+func (a *Struct) setData(data *Data) {
+ a.array.setData(data)
+ a.fields = make([]Interface, len(data.childData))
+ for i, child := range data.childData {
+ a.fields[i] = MakeFromData(child)
+ }
+}
+
+func (a *Struct) Release() {
+ a.array.Release()
+ for _, f := range a.fields {
+ f.Release()
+ }
+}
+
+type StructBuilder struct {
+ builder
+
+ dtype arrow.DataType
+ fields []Builder
+}
+
+// NewStructBuilder returns a builder, using the provided memory allocator.
+func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuilder {
+ b := &StructBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ dtype: dtype,
+ fields: make([]Builder, len(dtype.Fields())),
+ }
+ for i, f := range dtype.Fields() {
+ b.fields[i] = newBuilder(b.mem, f.Type)
+ }
+ return b
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *StructBuilder) Release() {
+ debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+ if atomic.AddInt64(&b.refCount, -1) == 0 {
+ if b.nullBitmap != nil {
+ b.nullBitmap.Release()
+ b.nullBitmap = nil
+ }
+ }
+
+ for _, f := range b.fields {
+ f.Release()
+ }
+}
+
+func (b *StructBuilder) Append(v bool) {
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(v)
+ if !v {
+ for _, f := range b.fields {
+ f.AppendNull()
+ }
+ }
+}
+
+func (b *StructBuilder) AppendValues(valids []bool) {
+ b.Reserve(len(valids))
+ b.builder.unsafeAppendBoolsToBitmap(valids, len(valids))
+}
+
+func (b *StructBuilder) AppendNull() { b.Append(false) }
+
+func (b *StructBuilder) unsafeAppend(v bool) {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ b.length++
+}
+
+func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) {
+ if isValid {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ } else {
+ b.nulls++
+ }
+ b.length++
+}
+
+func (b *StructBuilder) init(capacity int) {
+ b.builder.init(capacity)
+ for _, f := range b.fields {
+ f.init(capacity)
+ }
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *StructBuilder) Reserve(n int) {
+ b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *StructBuilder) Resize(n int) {
+ if n < minBuilderCapacity {
+ n = minBuilderCapacity
+ }
+
+ if b.capacity == 0 {
+ b.init(n)
+ } else {
+ b.builder.resize(n, b.builder.init)
+ for _, f := range b.fields {
+ f.resize(n, f.init)
+ }
+ }
+}
+
+func (b *StructBuilder) NumField() int { return len(b.fields) }
+func (b *StructBuilder) FieldBuilder(i int) Builder { return b.fields[i] }
+
+// NewArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder
+// so it can be used to build a new array.
+func (b *StructBuilder) NewArray() Interface {
+ return b.NewStructArray()
+}
+
+// NewStructArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder
+// so it can be used to build a new array.
+func (b *StructBuilder) NewStructArray() (a *Struct) {
+ data := b.newData()
+ a = NewStructData(data)
+ data.Release()
+ return
+}
+
+func (b *StructBuilder) newData() (data *Data) {
+ fields := make([]*Data, len(b.fields))
+ for i, f := range b.fields {
+ arr := f.NewArray()
+ defer arr.Release()
+ fields[i] = arr.Data()
+ }
+
+ data = NewData(
+ b.dtype, b.length,
+ []*memory.Buffer{
+ b.nullBitmap,
+ nil, // FIXME(sbinet)
+ },
+ fields,
+ b.nulls,
+ )
+ b.reset()
+
+ return
+}
+
+var (
+ _ Interface = (*Struct)(nil)
+ _ Builder = (*StructBuilder)(nil)
+)
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
new file mode 100644
index 0000000..6265299
--- /dev/null
+++ b/go/arrow/array/struct_test.go
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+ "reflect"
+ "testing"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/array"
+ "github.com/apache/arrow/go/arrow/memory"
+)
+
+func TestStructArray(t *testing.T) {
+ var (
+ pool = memory.NewGoAllocator()
+ f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'}
+ f2s = []int32{1, 2, 3, 4}
+
+ f1Lengths = []int{3, 0, 3, 4}
+ f1Offsets = []int32{0, 3, 3, 6, 10}
+ f1Valids = []bool{true, false, true, true}
+
+ isValid = []bool{true, true, true, true}
+
+ fields = []arrow.Field{
+ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+ {Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+ }
+ dtype = arrow.StructOf(fields...)
+ )
+
+ sb := array.NewStructBuilder(pool, dtype)
+ defer sb.Release()
+
+ for i := 0; i < 10; i++ {
+ f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+ f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+ f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+
+ if got, want := sb.NumField(), 2; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ sb.Resize(len(f1Lengths))
+ f1vb.Resize(len(f1s))
+ f2b.Resize(len(f2s))
+
+ pos := 0
+ for i, length := range f1Lengths {
+ f1b.Append(f1Valids[i])
+ for j := 0; j < length; j++ {
+ f1vb.Append(f1s[pos])
+ pos++
+ }
+ f2b.Append(f2s[i])
+ }
+
+ for _, valid := range isValid {
+ sb.Append(valid)
+ }
+
+ arr := sb.NewArray().(*array.Struct)
+ defer arr.Release()
+
+ if got, want := arr.DataType().ID(), arrow.STRUCT; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ if got, want := arr.Len(), len(isValid); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+ for i, valid := range isValid {
+ if got, want := arr.IsValid(i), valid; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ }
+
+ {
+ f1arr := arr.Field(0).(*array.List)
+ if got, want := f1arr.Len(), len(f1Lengths); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ for i := range f1Lengths {
+ if got, want := f1arr.IsValid(i), f1Valids[i]; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+
+ }
+
+ if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ varr := f1arr.ListValues().(*array.Uint8)
+ if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ }
+
+ {
+ f2arr := arr.Field(1).(*array.Int32)
+ if got, want := f2arr.Len(), len(f2s); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+ }
+ }
+}
+
+func TestStructArrayEmpty(t *testing.T) {
+ pool := memory.NewGoAllocator()
+ sb := array.NewStructBuilder(pool, arrow.StructOf())
+ defer sb.Release()
+
+ if got, want := sb.NumField(), 0; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ arr := sb.NewArray().(*array.Struct)
+
+ if got, want := arr.Len(), 0; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := arr.NumField(), 0; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+}
+
+func TestStructArrayBulkAppend(t *testing.T) {
+ var (
+ pool = memory.NewGoAllocator()
+ f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'}
+ f2s = []int32{1, 2, 3, 4}
+
+ f1Lengths = []int{3, 0, 3, 4}
+ f1Offsets = []int32{0, 3, 3, 6, 10}
+ f1Valids = []bool{true, false, true, true}
+
+ isValid = []bool{true, true, true, true}
+
+ fields = []arrow.Field{
+ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+ {Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+ }
+ dtype = arrow.StructOf(fields...)
+ )
+
+ sb := array.NewStructBuilder(pool, dtype)
+ defer sb.Release()
+
+ for i := 0; i < 10; i++ {
+ f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+ f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+ f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+
+ if got, want := sb.NumField(), 2; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ sb.Resize(len(f1Lengths))
+ f1vb.Resize(len(f1s))
+ f2b.Resize(len(f2s))
+
+ sb.AppendValues(isValid)
+ f1b.AppendValues(f1Offsets, f1Valids)
+ f1vb.AppendValues(f1s, nil)
+ f2b.AppendValues(f2s, nil)
+
+ arr := sb.NewArray().(*array.Struct)
+ defer arr.Release()
+
+ if got, want := arr.DataType().ID(), arrow.STRUCT; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ if got, want := arr.Len(), len(isValid); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+ for i, valid := range isValid {
+ if got, want := arr.IsValid(i), valid; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ }
+
+ {
+ f1arr := arr.Field(0).(*array.List)
+ if got, want := f1arr.Len(), len(f1Lengths); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ for i := range f1Lengths {
+ if got, want := f1arr.IsValid(i), f1Valids[i]; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+
+ }
+
+ if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ varr := f1arr.ListValues().(*array.Uint8)
+ if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ }
+
+ {
+ f2arr := arr.Field(1).(*array.Int32)
+ if got, want := f2arr.Len(), len(f2s); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+ }
+ }
+}
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index d34c096..e91e586 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -16,6 +16,8 @@
package arrow
+import "fmt"
+
// ListType describes a nested type in which each array slot contains
// a variable-size sequence of values, all having the same relative type.
type ListType struct {
@@ -39,6 +41,91 @@ func (*ListType) Name() string { return "list" }
// Elem returns the ListType's element type.
func (t *ListType) Elem() DataType { return t.elem }
+// StructType describes a nested type parameterized by an ordered sequence
+// of relative types, called its fields.
+type StructType struct {
+ fields []Field
+ index map[string]int
+ meta KeyValueMetadata
+}
+
+// StructOf returns the struct type with fields fs.
+//
+// StructOf panics if there are duplicated fields.
+// StructOf panics if there is a field with an invalid DataType.
+func StructOf(fs ...Field) *StructType {
+ n := len(fs)
+ if n == 0 {
+ return &StructType{}
+ }
+
+ t := &StructType{
+ fields: make([]Field, n),
+ index: make(map[string]int, n),
+ }
+ for i, f := range fs {
+ if f.Type == nil {
+ panic("arrow: field with nil DataType")
+ }
+ t.fields[i] = Field{
+ Name: f.Name,
+ Type: f.Type,
+ Nullable: f.Nullable,
+ Metadata: f.Metadata.clone(),
+ }
+ if _, dup := t.index[f.Name]; dup {
+ panic(fmt.Errorf("arrow: duplicate field with name %q", f.Name))
+ }
+ t.index[f.Name] = i
+ }
+
+ return t
+}
+
+func (*StructType) ID() Type { return STRUCT }
+func (*StructType) Name() string { return "struct" }
+
+func (t *StructType) Fields() []Field { return t.fields }
+func (t *StructType) Field(i int) Field { return t.fields[i] }
+
+func (t *StructType) FieldByName(name string) (Field, bool) {
+ i, ok := t.index[name]
+ if !ok {
+ return Field{}, false
+ }
+ return t.fields[i], true
+}
+
+type Field struct {
+ Name string // Field name
+ Type DataType // The field's data type
+ Nullable bool // Fields can be nullable
+ Metadata KeyValueMetadata // The field's metadata, if any
+}
+
+func (f Field) HasMetadata() bool { return len(f.Metadata.keys) != 0 }
+
+type KeyValueMetadata struct {
+ keys []string
+ values []string
+}
+
+func (kv KeyValueMetadata) clone() KeyValueMetadata {
+ if len(kv.keys) == 0 {
+ return KeyValueMetadata{}
+ }
+
+ o := KeyValueMetadata{
+ keys: make([]string, len(kv.keys)),
+ values: make([]string, len(kv.values)),
+ }
+ copy(o.keys, kv.keys)
+ copy(o.values, kv.values)
+
+ return o
+}
+
var (
_ DataType = (*ListType)(nil)
+ _ DataType = (*StructType)(nil)
)
diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go
index 54ffb02..8b3f5b7 100644
--- a/go/arrow/datatype_nested_test.go
+++ b/go/arrow/datatype_nested_test.go
@@ -34,6 +34,8 @@ func TestListOf(t *testing.T) {
PrimitiveTypes.Uint64,
PrimitiveTypes.Float32,
PrimitiveTypes.Float64,
+ ListOf(PrimitiveTypes.Int32),
+ StructOf(),
} {
t.Run(tc.Name(), func(t *testing.T) {
got := ListOf(tc)
@@ -55,4 +57,187 @@ func TestListOf(t *testing.T) {
}
})
}
+
+ for _, dtype := range []DataType{
+ nil,
+ // (*Int32Type)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ // (*ListType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ // (*StructType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ } {
+ t.Run("invalid", func(t *testing.T) {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("test should have panicked but did not")
+ }
+ }()
+
+ _ = ListOf(dtype)
+ })
+ }
+}
+
+func TestStructOf(t *testing.T) {
+ for _, tc := range []struct {
+ fields []Field
+ want DataType
+ }{
+ {
+ fields: nil,
+ want: &StructType{fields: nil, index: nil},
+ },
+ {
+ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}},
+ want: &StructType{
+ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}},
+ index: map[string]int{"f1": 0},
+ },
+ },
+ {
+ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}},
+ want: &StructType{
+ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}},
+ index: map[string]int{"f1": 0},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "", Type: PrimitiveTypes.Int64},
+ },
+ want: &StructType{
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "", Type: PrimitiveTypes.Int64},
+ },
+ index: map[string]int{"f1": 0, "": 1},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ },
+ want: &StructType{
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ },
+ index: map[string]int{"f1": 0, "f2": 1},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(PrimitiveTypes.Float64)},
+ },
+ want: &StructType{
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(PrimitiveTypes.Float64)},
+ },
+ index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))},
+ },
+ want: &StructType{
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))},
+ },
+ index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))},
+ },
+ want: &StructType{
+ fields: []Field{
+ {Name: "f1", Type: PrimitiveTypes.Int32},
+ {Name: "f2", Type: PrimitiveTypes.Int64},
+ {Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))},
+ },
+ index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+ },
+ },
+ } {
+ t.Run("", func(t *testing.T) {
+ got := StructOf(tc.fields...)
+ if !reflect.DeepEqual(got, tc.want) {
+ t.Fatalf("got=%#v, want=%#v", got, tc.want)
+ }
+
+ if got, want := got.ID(), STRUCT; got != want {
+ t.Fatalf("invalid ID. got=%v, want=%v", got, want)
+ }
+
+ if got, want := got.Name(), "struct"; got != want {
+ t.Fatalf("invalid name. got=%q, want=%q", got, want)
+ }
+
+ if got, want := len(got.Fields()), len(tc.fields); got != want {
+ t.Fatalf("invalid number of fields. got=%d, want=%d", got, want)
+ }
+
+ _, ok := got.FieldByName("not-there")
+ if ok {
+ t.Fatalf("expected an error")
+ }
+
+ if len(tc.fields) > 0 {
+ f1, ok := got.FieldByName("f1")
+ if !ok {
+ t.Fatalf("could not retrieve field 'f1'")
+ }
+ if f1.HasMetadata() {
+ t.Fatalf("field 'f1' should not have metadata")
+ }
+
+ for i := range tc.fields {
+ f := got.Field(i)
+ if f.Name != tc.fields[i].Name {
+ t.Fatalf("incorrect named for field[%d]: got=%q, want=%q", i, f.Name, tc.fields[i].Name)
+ }
+ }
+ }
+ })
+ }
+
+ for _, tc := range []struct {
+ fields []Field
+ }{
+ {
+ fields: []Field{
+ {Name: "", Type: PrimitiveTypes.Int32},
+ {Name: "", Type: PrimitiveTypes.Int32},
+ },
+ },
+ {
+ fields: []Field{
+ {Name: "x", Type: PrimitiveTypes.Int32},
+ {Name: "x", Type: PrimitiveTypes.Int32},
+ },
+ },
+ } {
+ t.Run("", func(t *testing.T) {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("should have panicked")
+ }
+ }()
+ _ = StructOf(tc.fields...)
+ })
+ }
}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 19a6630..70aadbb 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -191,3 +191,93 @@ func Example_listArray() {
// List[5] = (null)
// List[6] = [9]
}
+
+// This example shows how to create a Struct array.
+// The resulting array should be:
+// [{‘joe’, 1}, {null, 2}, null, {‘mark’, 4}]
+func Example_structArray() {
+ pool := memory.NewGoAllocator()
+ dtype := arrow.StructOf([]arrow.Field{
+ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+ {Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+ }...)
+
+ sb := array.NewStructBuilder(pool, dtype)
+ defer sb.Release()
+
+ f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+ defer f1b.Release()
+ f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+ defer f1vb.Release()
+
+ f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+ defer f2b.Release()
+
+ sb.Reserve(4)
+ f1vb.Reserve(7)
+ f2b.Reserve(3)
+
+ sb.Append(true)
+ f1b.Append(true)
+ f1vb.AppendValues([]byte("joe"), nil)
+ f2b.Append(1)
+
+ sb.Append(true)
+ f1b.AppendNull()
+ f2b.Append(2)
+
+ sb.AppendNull()
+
+ sb.Append(true)
+ f1b.Append(true)
+ f1vb.AppendValues([]byte("mark"), nil)
+ f2b.Append(4)
+
+ arr := sb.NewArray().(*array.Struct)
+ defer arr.Release()
+
+ fmt.Printf("NullN() = %d\n", arr.NullN())
+ fmt.Printf("Len() = %d\n", arr.Len())
+
+ list := arr.Field(0).(*array.List)
+ defer list.Release()
+
+ offsets := list.Offsets()
+
+ varr := list.ListValues().(*array.Uint8)
+ defer varr.Release()
+
+ ints := arr.Field(1).(*array.Int32)
+ defer ints.Release()
+
+ for i := 0; i < arr.Len(); i++ {
+ if !arr.IsValid(i) {
+ fmt.Printf("Struct[%d] = (null)\n", i)
+ continue
+ }
+ fmt.Printf("Struct[%d] = [", i)
+ pos := int(offsets[i])
+ switch {
+ case list.IsValid(pos):
+ fmt.Printf("[")
+ for j := offsets[i]; j < offsets[i+1]; j++ {
+ if j != offsets[i] {
+ fmt.Printf(", ")
+ }
+ fmt.Printf("%v", string(varr.Value(int(j))))
+ }
+ fmt.Printf("], ")
+ default:
+ fmt.Printf("(null), ")
+ }
+ fmt.Printf("%d]\n", ints.Value(i))
+ }
+
+ // Output:
+ // NullN() = 1
+ // Len() = 4
+ // Struct[0] = [[j, o, e], 1]
+ // Struct[1] = [[], 2]
+ // Struct[2] = (null)
+ // Struct[3] = [[m, a, r, k], 4]
+}
diff --git a/go/arrow/metadata/schema.go b/go/arrow/metadata/schema.go
index 021b6ee..2dad190 100644
--- a/go/arrow/metadata/schema.go
+++ b/go/arrow/metadata/schema.go
@@ -19,19 +19,7 @@ package metadata
import "github.com/apache/arrow/go/arrow"
type Schema struct {
- fields []Field
+ fields []arrow.Field
nameToIndex map[string]int
- metadata KeyValueMetadata
-}
-
-type Field struct {
- name string // Field name
- typ arrow.DataType // The field's data type
- nullable bool // Fields can be nullable
- metadata KeyValueMetadata // The field's metadata, if any
-}
-
-type KeyValueMetadata struct {
- keys []string
- values []string
+ metadata arrow.KeyValueMetadata
}