You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/08/13 18:00:07 UTC

[arrow] branch master updated: ARROW-3022: [Go] add support for Struct arrays

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2dfb01f  ARROW-3022: [Go] add support for Struct arrays
2dfb01f is described below

commit 2dfb01fd8632618315e2d9e215b5cfafb0cdd1a8
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Aug 13 19:59:57 2018 +0200

    ARROW-3022: [Go] add support for Struct arrays
    
    Author: Sebastien Binet <bi...@cern.ch>
    
    Closes #2411 from sbinet/issue-3022 and squashes the following commits:
    
    101752a <Sebastien Binet> ARROW-3022:  add support for Struct arrays
---
 go/README.md                     |   6 +-
 go/arrow/array/array.go          |   1 +
 go/arrow/array/array_test.go     |   6 +
 go/arrow/array/builder.go        |   8 ++
 go/arrow/array/struct.go         | 201 ++++++++++++++++++++++++++++++++
 go/arrow/array/struct_test.go    | 243 +++++++++++++++++++++++++++++++++++++++
 go/arrow/datatype_nested.go      |  87 ++++++++++++++
 go/arrow/datatype_nested_test.go | 185 +++++++++++++++++++++++++++++
 go/arrow/example_test.go         |  90 +++++++++++++++
 go/arrow/metadata/schema.go      |  16 +--
 10 files changed, 826 insertions(+), 17 deletions(-)

diff --git a/go/README.md b/go/README.md
index e4d349d..da18937 100644
--- a/go/README.md
+++ b/go/README.md
@@ -143,8 +143,8 @@ them internally in the [ifql][] execution engine and storage layers of [InfluxDB
 - [ ] Time64 (microseconds or nanoseconds since midnight)
 - [ ] Decimal (128-bit)
 - [ ] Fixed-sized binary
-- [ ] List
-- [ ] Struct
+- [x] List
+- [x] Struct
 - [ ] Union
     - [ ] Dense
     - [ ] Sparse
@@ -171,4 +171,4 @@ Serialization is planned for a future iteration.
 [arrow]:    https://arrow.apache.org
 [ifql]:     https://github.com/influxdata/ifql
 [InfluxDB]: https://github.com/influxdata/influxdb
-[c2goasm]:  https://github.com/minio/c2goasm
\ No newline at end of file
+[c2goasm]:  https://github.com/minio/c2goasm
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index 61cefac..f45bec4 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -186,4 +186,5 @@ func MakeFromData(data *Data) Interface {
 
 func init() {
 	makeArrayFn[arrow.LIST] = func(data *Data) Interface { return NewListData(data) }
+	makeArrayFn[arrow.STRUCT] = func(data *Data) Interface { return NewStructData(data) }
 }
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index cd051ee..b442c76 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -66,6 +66,12 @@ func TestMakeFromData(t *testing.T) {
 			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
 		}},
 
+		{name: "struct", d: &testDataType{arrow.STRUCT}},
+		{name: "struct", d: &testDataType{arrow.STRUCT}, child: []*array.Data{
+			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+			array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+		}},
+
 		// invalid types
 		{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"},
 		{name: "invalid(28)", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "invalid data type: Type(28)"},
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index a20c303..2e237f4 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -44,10 +44,16 @@ type Builder interface {
 	// NullN returns the number of null values in the array builder.
 	NullN() int
 
+	// AppendNull adds a new null value to the array being built.
+	AppendNull()
+
 	// NewArray creates a new array from the memory buffers used
 	// by the builder and resets the Builder so it can be used to build
 	// a new array.
 	NewArray() Interface
+
+	init(capacity int)
+	resize(newBits int, init func(int))
 }
 
 // builder provides common functionality for managing the validity bitmap (nulls) when building arrays.
@@ -227,6 +233,8 @@ func newBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 		typ := dtype.(*arrow.ListType)
 		return NewListBuilder(mem, typ.Elem())
 	case arrow.STRUCT:
+		typ := dtype.(*arrow.StructType)
+		return NewStructBuilder(mem, typ)
 	case arrow.UNION:
 	case arrow.DICTIONARY:
 	case arrow.MAP:
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
new file mode 100644
index 0000000..9e0c1aa
--- /dev/null
+++ b/go/arrow/array/struct.go
@@ -0,0 +1,201 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+	"sync/atomic"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/internal/bitutil"
+	"github.com/apache/arrow/go/arrow/internal/debug"
+	"github.com/apache/arrow/go/arrow/memory"
+)
+
+// Struct represents an ordered sequence of relative types.
+type Struct struct {
+	array
+	fields []Interface
+}
+
+// NewStructData returns a new Struct array value from data.
+func NewStructData(data *Data) *Struct {
+	a := &Struct{}
+	a.refCount = 1
+	a.setData(data)
+	return a
+}
+
+func (a *Struct) NumField() int         { return len(a.fields) }
+func (a *Struct) Field(i int) Interface { return a.fields[i] }
+
+func (a *Struct) setData(data *Data) {
+	a.array.setData(data)
+	a.fields = make([]Interface, len(data.childData))
+	for i, child := range data.childData {
+		a.fields[i] = MakeFromData(child)
+	}
+}
+
+func (a *Struct) Release() {
+	a.array.Release()
+	for _, f := range a.fields {
+		f.Release()
+	}
+}
+
+type StructBuilder struct {
+	builder
+
+	dtype  arrow.DataType
+	fields []Builder
+}
+
+// NewStructBuilder returns a builder, using the provided memory allocator.
+func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuilder {
+	b := &StructBuilder{
+		builder: builder{refCount: 1, mem: mem},
+		dtype:   dtype,
+		fields:  make([]Builder, len(dtype.Fields())),
+	}
+	for i, f := range dtype.Fields() {
+		b.fields[i] = newBuilder(b.mem, f.Type)
+	}
+	return b
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *StructBuilder) Release() {
+	debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+	if atomic.AddInt64(&b.refCount, -1) == 0 {
+		if b.nullBitmap != nil {
+			b.nullBitmap.Release()
+			b.nullBitmap = nil
+		}
+	}
+
+	for _, f := range b.fields {
+		f.Release()
+	}
+}
+
+func (b *StructBuilder) Append(v bool) {
+	b.Reserve(1)
+	b.unsafeAppendBoolToBitmap(v)
+	if !v {
+		for _, f := range b.fields {
+			f.AppendNull()
+		}
+	}
+}
+
+func (b *StructBuilder) AppendValues(valids []bool) {
+	b.Reserve(len(valids))
+	b.builder.unsafeAppendBoolsToBitmap(valids, len(valids))
+}
+
+func (b *StructBuilder) AppendNull() { b.Append(false) }
+
+func (b *StructBuilder) unsafeAppend(v bool) {
+	bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	b.length++
+}
+
+func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) {
+	if isValid {
+		bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+	} else {
+		b.nulls++
+	}
+	b.length++
+}
+
+func (b *StructBuilder) init(capacity int) {
+	b.builder.init(capacity)
+	for _, f := range b.fields {
+		f.init(capacity)
+	}
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *StructBuilder) Reserve(n int) {
+	b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *StructBuilder) Resize(n int) {
+	if n < minBuilderCapacity {
+		n = minBuilderCapacity
+	}
+
+	if b.capacity == 0 {
+		b.init(n)
+	} else {
+		b.builder.resize(n, b.builder.init)
+		for _, f := range b.fields {
+			f.resize(n, f.init)
+		}
+	}
+}
+
+func (b *StructBuilder) NumField() int              { return len(b.fields) }
+func (b *StructBuilder) FieldBuilder(i int) Builder { return b.fields[i] }
+
+// NewArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder
+// so it can be used to build a new array.
+func (b *StructBuilder) NewArray() Interface {
+	return b.NewStructArray()
+}
+
+// NewStructArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder
+// so it can be used to build a new array.
+func (b *StructBuilder) NewStructArray() (a *Struct) {
+	data := b.newData()
+	a = NewStructData(data)
+	data.Release()
+	return
+}
+
+func (b *StructBuilder) newData() (data *Data) {
+	fields := make([]*Data, len(b.fields))
+	for i, f := range b.fields {
+		arr := f.NewArray()
+		defer arr.Release()
+		fields[i] = arr.Data()
+	}
+
+	data = NewData(
+		b.dtype, b.length,
+		[]*memory.Buffer{
+			b.nullBitmap,
+			nil, // FIXME(sbinet)
+		},
+		fields,
+		b.nulls,
+	)
+	b.reset()
+
+	return
+}
+
+var (
+	_ Interface = (*Struct)(nil)
+	_ Builder   = (*StructBuilder)(nil)
+)
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
new file mode 100644
index 0000000..6265299
--- /dev/null
+++ b/go/arrow/array/struct_test.go
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/arrow"
+	"github.com/apache/arrow/go/arrow/array"
+	"github.com/apache/arrow/go/arrow/memory"
+)
+
+func TestStructArray(t *testing.T) {
+	var (
+		pool = memory.NewGoAllocator()
+		f1s  = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'}
+		f2s  = []int32{1, 2, 3, 4}
+
+		f1Lengths = []int{3, 0, 3, 4}
+		f1Offsets = []int32{0, 3, 3, 6, 10}
+		f1Valids  = []bool{true, false, true, true}
+
+		isValid = []bool{true, true, true, true}
+
+		fields = []arrow.Field{
+			{Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+			{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+		}
+		dtype = arrow.StructOf(fields...)
+	)
+
+	sb := array.NewStructBuilder(pool, dtype)
+	defer sb.Release()
+
+	for i := 0; i < 10; i++ {
+		f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+		f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+		f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+
+		if got, want := sb.NumField(), 2; got != want {
+			t.Fatalf("got=%d, want=%d", got, want)
+		}
+
+		sb.Resize(len(f1Lengths))
+		f1vb.Resize(len(f1s))
+		f2b.Resize(len(f2s))
+
+		pos := 0
+		for i, length := range f1Lengths {
+			f1b.Append(f1Valids[i])
+			for j := 0; j < length; j++ {
+				f1vb.Append(f1s[pos])
+				pos++
+			}
+			f2b.Append(f2s[i])
+		}
+
+		for _, valid := range isValid {
+			sb.Append(valid)
+		}
+
+		arr := sb.NewArray().(*array.Struct)
+		defer arr.Release()
+
+		if got, want := arr.DataType().ID(), arrow.STRUCT; got != want {
+			t.Fatalf("got=%v, want=%v", got, want)
+		}
+		if got, want := arr.Len(), len(isValid); got != want {
+			t.Fatalf("got=%d, want=%d", got, want)
+		}
+		for i, valid := range isValid {
+			if got, want := arr.IsValid(i), valid; got != want {
+				t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+			}
+		}
+
+		{
+			f1arr := arr.Field(0).(*array.List)
+			if got, want := f1arr.Len(), len(f1Lengths); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			for i := range f1Lengths {
+				if got, want := f1arr.IsValid(i), f1Valids[i]; got != want {
+					t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+				}
+				if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want {
+					t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+				}
+
+			}
+
+			if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+
+			varr := f1arr.ListValues().(*array.Uint8)
+			if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		}
+
+		{
+			f2arr := arr.Field(1).(*array.Int32)
+			if got, want := f2arr.Len(), len(f2s); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+		}
+	}
+}
+
+func TestStructArrayEmpty(t *testing.T) {
+	pool := memory.NewGoAllocator()
+	sb := array.NewStructBuilder(pool, arrow.StructOf())
+	defer sb.Release()
+
+	if got, want := sb.NumField(), 0; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	arr := sb.NewArray().(*array.Struct)
+
+	if got, want := arr.Len(), 0; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NumField(), 0; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+}
+
+func TestStructArrayBulkAppend(t *testing.T) {
+	var (
+		pool = memory.NewGoAllocator()
+		f1s  = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'}
+		f2s  = []int32{1, 2, 3, 4}
+
+		f1Lengths = []int{3, 0, 3, 4}
+		f1Offsets = []int32{0, 3, 3, 6, 10}
+		f1Valids  = []bool{true, false, true, true}
+
+		isValid = []bool{true, true, true, true}
+
+		fields = []arrow.Field{
+			{Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+			{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+		}
+		dtype = arrow.StructOf(fields...)
+	)
+
+	sb := array.NewStructBuilder(pool, dtype)
+	defer sb.Release()
+
+	for i := 0; i < 10; i++ {
+		f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+		f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+		f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+
+		if got, want := sb.NumField(), 2; got != want {
+			t.Fatalf("got=%d, want=%d", got, want)
+		}
+
+		sb.Resize(len(f1Lengths))
+		f1vb.Resize(len(f1s))
+		f2b.Resize(len(f2s))
+
+		sb.AppendValues(isValid)
+		f1b.AppendValues(f1Offsets, f1Valids)
+		f1vb.AppendValues(f1s, nil)
+		f2b.AppendValues(f2s, nil)
+
+		arr := sb.NewArray().(*array.Struct)
+		defer arr.Release()
+
+		if got, want := arr.DataType().ID(), arrow.STRUCT; got != want {
+			t.Fatalf("got=%v, want=%v", got, want)
+		}
+		if got, want := arr.Len(), len(isValid); got != want {
+			t.Fatalf("got=%d, want=%d", got, want)
+		}
+		for i, valid := range isValid {
+			if got, want := arr.IsValid(i), valid; got != want {
+				t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+			}
+		}
+
+		{
+			f1arr := arr.Field(0).(*array.List)
+			if got, want := f1arr.Len(), len(f1Lengths); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			for i := range f1Lengths {
+				if got, want := f1arr.IsValid(i), f1Valids[i]; got != want {
+					t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+				}
+				if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want {
+					t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+				}
+
+			}
+
+			if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+
+			varr := f1arr.ListValues().(*array.Uint8)
+			if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		}
+
+		{
+			f2arr := arr.Field(1).(*array.Int32)
+			if got, want := f2arr.Len(), len(f2s); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+		}
+	}
+}
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index d34c096..e91e586 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -16,6 +16,8 @@
 
 package arrow
 
+import "fmt"
+
 // ListType describes a nested type in which each array slot contains
 // a variable-size sequence of values, all having the same relative type.
 type ListType struct {
@@ -39,6 +41,91 @@ func (*ListType) Name() string { return "list" }
 // Elem returns the ListType's element type.
 func (t *ListType) Elem() DataType { return t.elem }
 
+// StructType describes a nested type parameterized by an ordered sequence
+// of relative types, called its fields.
+type StructType struct {
+	fields []Field
+	index  map[string]int
+	meta   KeyValueMetadata
+}
+
+// StructOf returns the struct type with fields fs.
+//
+// StructOf panics if there are duplicated fields.
+// StructOf panics if there is a field with an invalid DataType.
+func StructOf(fs ...Field) *StructType {
+	n := len(fs)
+	if n == 0 {
+		return &StructType{}
+	}
+
+	t := &StructType{
+		fields: make([]Field, n),
+		index:  make(map[string]int, n),
+	}
+	for i, f := range fs {
+		if f.Type == nil {
+			panic("arrow: field with nil DataType")
+		}
+		t.fields[i] = Field{
+			Name:     f.Name,
+			Type:     f.Type,
+			Nullable: f.Nullable,
+			Metadata: f.Metadata.clone(),
+		}
+		if _, dup := t.index[f.Name]; dup {
+			panic(fmt.Errorf("arrow: duplicate field with name %q", f.Name))
+		}
+		t.index[f.Name] = i
+	}
+
+	return t
+}
+
+func (*StructType) ID() Type     { return STRUCT }
+func (*StructType) Name() string { return "struct" }
+
+func (t *StructType) Fields() []Field   { return t.fields }
+func (t *StructType) Field(i int) Field { return t.fields[i] }
+
+func (t *StructType) FieldByName(name string) (Field, bool) {
+	i, ok := t.index[name]
+	if !ok {
+		return Field{}, false
+	}
+	return t.fields[i], true
+}
+
+type Field struct {
+	Name     string           // Field name
+	Type     DataType         // The field's data type
+	Nullable bool             // Fields can be nullable
+	Metadata KeyValueMetadata // The field's metadata, if any
+}
+
+func (f Field) HasMetadata() bool { return len(f.Metadata.keys) != 0 }
+
+type KeyValueMetadata struct {
+	keys   []string
+	values []string
+}
+
+func (kv KeyValueMetadata) clone() KeyValueMetadata {
+	if len(kv.keys) == 0 {
+		return KeyValueMetadata{}
+	}
+
+	o := KeyValueMetadata{
+		keys:   make([]string, len(kv.keys)),
+		values: make([]string, len(kv.values)),
+	}
+	copy(o.keys, kv.keys)
+	copy(o.values, kv.values)
+
+	return o
+}
+
 var (
 	_ DataType = (*ListType)(nil)
+	_ DataType = (*StructType)(nil)
 )
diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go
index 54ffb02..8b3f5b7 100644
--- a/go/arrow/datatype_nested_test.go
+++ b/go/arrow/datatype_nested_test.go
@@ -34,6 +34,8 @@ func TestListOf(t *testing.T) {
 		PrimitiveTypes.Uint64,
 		PrimitiveTypes.Float32,
 		PrimitiveTypes.Float64,
+		ListOf(PrimitiveTypes.Int32),
+		StructOf(),
 	} {
 		t.Run(tc.Name(), func(t *testing.T) {
 			got := ListOf(tc)
@@ -55,4 +57,187 @@ func TestListOf(t *testing.T) {
 			}
 		})
 	}
+
+	for _, dtype := range []DataType{
+		nil,
+		// (*Int32Type)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+		// (*ListType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+		// (*StructType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+	} {
+		t.Run("invalid", func(t *testing.T) {
+			defer func() {
+				e := recover()
+				if e == nil {
+					t.Fatalf("test should have panicked but did not")
+				}
+			}()
+
+			_ = ListOf(dtype)
+		})
+	}
+}
+
+func TestStructOf(t *testing.T) {
+	for _, tc := range []struct {
+		fields []Field
+		want   DataType
+	}{
+		{
+			fields: nil,
+			want:   &StructType{fields: nil, index: nil},
+		},
+		{
+			fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}},
+			want: &StructType{
+				fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}},
+				index:  map[string]int{"f1": 0},
+			},
+		},
+		{
+			fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}},
+			want: &StructType{
+				fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}},
+				index:  map[string]int{"f1": 0},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "f1", Type: PrimitiveTypes.Int32},
+				{Name: "", Type: PrimitiveTypes.Int64},
+			},
+			want: &StructType{
+				fields: []Field{
+					{Name: "f1", Type: PrimitiveTypes.Int32},
+					{Name: "", Type: PrimitiveTypes.Int64},
+				},
+				index: map[string]int{"f1": 0, "": 1},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "f1", Type: PrimitiveTypes.Int32},
+				{Name: "f2", Type: PrimitiveTypes.Int64},
+			},
+			want: &StructType{
+				fields: []Field{
+					{Name: "f1", Type: PrimitiveTypes.Int32},
+					{Name: "f2", Type: PrimitiveTypes.Int64},
+				},
+				index: map[string]int{"f1": 0, "f2": 1},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "f1", Type: PrimitiveTypes.Int32},
+				{Name: "f2", Type: PrimitiveTypes.Int64},
+				{Name: "f3", Type: ListOf(PrimitiveTypes.Float64)},
+			},
+			want: &StructType{
+				fields: []Field{
+					{Name: "f1", Type: PrimitiveTypes.Int32},
+					{Name: "f2", Type: PrimitiveTypes.Int64},
+					{Name: "f3", Type: ListOf(PrimitiveTypes.Float64)},
+				},
+				index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "f1", Type: PrimitiveTypes.Int32},
+				{Name: "f2", Type: PrimitiveTypes.Int64},
+				{Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))},
+			},
+			want: &StructType{
+				fields: []Field{
+					{Name: "f1", Type: PrimitiveTypes.Int32},
+					{Name: "f2", Type: PrimitiveTypes.Int64},
+					{Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))},
+				},
+				index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "f1", Type: PrimitiveTypes.Int32},
+				{Name: "f2", Type: PrimitiveTypes.Int64},
+				{Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))},
+			},
+			want: &StructType{
+				fields: []Field{
+					{Name: "f1", Type: PrimitiveTypes.Int32},
+					{Name: "f2", Type: PrimitiveTypes.Int64},
+					{Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))},
+				},
+				index: map[string]int{"f1": 0, "f2": 1, "f3": 2},
+			},
+		},
+	} {
+		t.Run("", func(t *testing.T) {
+			got := StructOf(tc.fields...)
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Fatalf("got=%#v, want=%#v", got, tc.want)
+			}
+
+			if got, want := got.ID(), STRUCT; got != want {
+				t.Fatalf("invalid ID. got=%v, want=%v", got, want)
+			}
+
+			if got, want := got.Name(), "struct"; got != want {
+				t.Fatalf("invalid name. got=%q, want=%q", got, want)
+			}
+
+			if got, want := len(got.Fields()), len(tc.fields); got != want {
+				t.Fatalf("invalid number of fields. got=%d, want=%d", got, want)
+			}
+
+			_, ok := got.FieldByName("not-there")
+			if ok {
+				t.Fatalf("expected an error")
+			}
+
+			if len(tc.fields) > 0 {
+				f1, ok := got.FieldByName("f1")
+				if !ok {
+					t.Fatalf("could not retrieve field 'f1'")
+				}
+				if f1.HasMetadata() {
+					t.Fatalf("field 'f1' should not have metadata")
+				}
+
+				for i := range tc.fields {
+					f := got.Field(i)
+					if f.Name != tc.fields[i].Name {
+						t.Fatalf("incorrect named for field[%d]: got=%q, want=%q", i, f.Name, tc.fields[i].Name)
+					}
+				}
+			}
+		})
+	}
+
+	for _, tc := range []struct {
+		fields []Field
+	}{
+		{
+			fields: []Field{
+				{Name: "", Type: PrimitiveTypes.Int32},
+				{Name: "", Type: PrimitiveTypes.Int32},
+			},
+		},
+		{
+			fields: []Field{
+				{Name: "x", Type: PrimitiveTypes.Int32},
+				{Name: "x", Type: PrimitiveTypes.Int32},
+			},
+		},
+	} {
+		t.Run("", func(t *testing.T) {
+			defer func() {
+				e := recover()
+				if e == nil {
+					t.Fatalf("should have panicked")
+				}
+			}()
+			_ = StructOf(tc.fields...)
+		})
+	}
 }
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 19a6630..70aadbb 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -191,3 +191,93 @@ func Example_listArray() {
 	// List[5]   = (null)
 	// List[6]   = [9]
 }
+
+// This example shows how to create a Struct array.
+// The resulting array should be:
+//  [{‘joe’, 1}, {null, 2}, null, {‘mark’, 4}]
+func Example_structArray() {
+	pool := memory.NewGoAllocator()
+	dtype := arrow.StructOf([]arrow.Field{
+		{Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)},
+		{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+	}...)
+
+	sb := array.NewStructBuilder(pool, dtype)
+	defer sb.Release()
+
+	f1b := sb.FieldBuilder(0).(*array.ListBuilder)
+	defer f1b.Release()
+	f1vb := f1b.ValueBuilder().(*array.Uint8Builder)
+	defer f1vb.Release()
+
+	f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+	defer f2b.Release()
+
+	sb.Reserve(4)
+	f1vb.Reserve(7)
+	f2b.Reserve(3)
+
+	sb.Append(true)
+	f1b.Append(true)
+	f1vb.AppendValues([]byte("joe"), nil)
+	f2b.Append(1)
+
+	sb.Append(true)
+	f1b.AppendNull()
+	f2b.Append(2)
+
+	sb.AppendNull()
+
+	sb.Append(true)
+	f1b.Append(true)
+	f1vb.AppendValues([]byte("mark"), nil)
+	f2b.Append(4)
+
+	arr := sb.NewArray().(*array.Struct)
+	defer arr.Release()
+
+	fmt.Printf("NullN() = %d\n", arr.NullN())
+	fmt.Printf("Len()   = %d\n", arr.Len())
+
+	list := arr.Field(0).(*array.List)
+	defer list.Release()
+
+	offsets := list.Offsets()
+
+	varr := list.ListValues().(*array.Uint8)
+	defer varr.Release()
+
+	ints := arr.Field(1).(*array.Int32)
+	defer ints.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		if !arr.IsValid(i) {
+			fmt.Printf("Struct[%d] = (null)\n", i)
+			continue
+		}
+		fmt.Printf("Struct[%d] = [", i)
+		pos := int(offsets[i])
+		switch {
+		case list.IsValid(pos):
+			fmt.Printf("[")
+			for j := offsets[i]; j < offsets[i+1]; j++ {
+				if j != offsets[i] {
+					fmt.Printf(", ")
+				}
+				fmt.Printf("%v", string(varr.Value(int(j))))
+			}
+			fmt.Printf("], ")
+		default:
+			fmt.Printf("(null), ")
+		}
+		fmt.Printf("%d]\n", ints.Value(i))
+	}
+
+	// Output:
+	// NullN() = 1
+	// Len()   = 4
+	// Struct[0] = [[j, o, e], 1]
+	// Struct[1] = [[], 2]
+	// Struct[2] = (null)
+	// Struct[3] = [[m, a, r, k], 4]
+}
diff --git a/go/arrow/metadata/schema.go b/go/arrow/metadata/schema.go
index 021b6ee..2dad190 100644
--- a/go/arrow/metadata/schema.go
+++ b/go/arrow/metadata/schema.go
@@ -19,19 +19,7 @@ package metadata
 import "github.com/apache/arrow/go/arrow"
 
 type Schema struct {
-	fields      []Field
+	fields      []arrow.Field
 	nameToIndex map[string]int
-	metadata    KeyValueMetadata
-}
-
-type Field struct {
-	name     string           // Field name
-	typ      arrow.DataType   // The field's data type
-	nullable bool             // Fields can be nullable
-	metadata KeyValueMetadata // The field's metadata, if any
-}
-
-type KeyValueMetadata struct {
-	keys   []string
-	values []string
+	metadata    arrow.KeyValueMetadata
 }