You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/05/31 07:22:32 UTC
[arrow] branch master updated: ARROW-5384: [Go] implement
FixedSizeList array
This is an automated email from the ASF dual-hosted git repository.
sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a7dadb3 ARROW-5384: [Go] implement FixedSizeList array
a7dadb3 is described below
commit a7dadb37afde6d79b309ce583765c11469413916
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Fri May 31 09:22:15 2019 +0200
ARROW-5384: [Go] implement FixedSizeList array
Author: Sebastien Binet <bi...@cern.ch>
Closes #4357 from sbinet/issue-5384 and squashes the following commits:
68fc185f <Sebastien Binet> handle slice-offsets
b7f12be4 <Sebastien Binet> ARROW-5384: implement FixedSizeList array
---
go/arrow/array/array.go | 6 +-
go/arrow/array/array_test.go | 14 +-
go/arrow/array/builder.go | 5 +
go/arrow/array/fixed_size_list.go | 244 +++++++++++++++++++++++++++++++++
go/arrow/array/fixed_size_list_test.go | 225 ++++++++++++++++++++++++++++++
go/arrow/datatype.go | 10 ++
go/arrow/datatype_nested.go | 34 +++++
go/arrow/datatype_nested_test.go | 63 +++++++++
go/arrow/example_test.go | 76 ++++++++++
go/arrow/internal/arrdata/arrdata.go | 75 ++++++++++
go/arrow/ipc/file_reader.go | 16 +++
go/arrow/ipc/metadata.go | 15 ++
go/arrow/ipc/writer.go | 38 +++++
go/arrow/type_string.go | 41 +++++-
14 files changed, 856 insertions(+), 6 deletions(-)
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index ef37aef..82a1c01 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -192,11 +192,11 @@ func init() {
arrow.UNION: unsupportedArrayType,
arrow.DICTIONARY: unsupportedArrayType,
arrow.MAP: unsupportedArrayType,
+ arrow.EXTENSION: unsupportedArrayType,
+ arrow.FIXED_SIZE_LIST: func(data *Data) Interface { return NewFixedSizeListData(data) },
+ arrow.DURATION: unsupportedArrayType,
// invalid data types to fill out array size 2⁵-1
- 28: invalidDataType,
- 29: invalidDataType,
- 30: invalidDataType,
31: invalidDataType,
}
}
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 014a79a..3a3407f 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -76,9 +76,21 @@ func TestMakeFromData(t *testing.T) {
array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
}},
+ {name: "fixed_size_list", d: &testDataType{arrow.FIXED_SIZE_LIST}, child: []*array.Data{
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+ }},
+
+ // unsupported types
+ {name: "union", d: &testDataType{arrow.UNION}, expPanic: true, expError: "unsupported data type: UNION"},
+ {name: "dictionary", d: &testDataType{arrow.DICTIONARY}, expPanic: true, expError: "unsupported data type: DICTIONARY"},
+ {name: "map", d: &testDataType{arrow.Type(27)}, expPanic: true, expError: "unsupported data type: MAP"},
+ {name: "extension", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "unsupported data type: EXTENSION"},
+ {name: "duration", d: &testDataType{arrow.Type(30)}, expPanic: true, expError: "unsupported data type: DURATION"},
+
// invalid types
{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"},
- {name: "invalid(28)", d: &testDataType{arrow.Type(28)}, expPanic: true, expError: "invalid data type: Type(28)"},
+ {name: "invalid(31)", d: &testDataType{arrow.Type(31)}, expPanic: true, expError: "invalid data type: Type(31)"},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index d6cec85..c815cd4 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -262,6 +262,11 @@ func newBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
case arrow.UNION:
case arrow.DICTIONARY:
case arrow.MAP:
+ case arrow.EXTENSION:
+ case arrow.FIXED_SIZE_LIST:
+ typ := dtype.(*arrow.FixedSizeListType)
+ return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
+ case arrow.DURATION:
}
panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype))
}
diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go
new file mode 100644
index 0000000..9816e65
--- /dev/null
+++ b/go/arrow/array/fixed_size_list.go
@@ -0,0 +1,244 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+ "fmt"
+ "strings"
+ "sync/atomic"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/internal/bitutil"
+ "github.com/apache/arrow/go/arrow/internal/debug"
+ "github.com/apache/arrow/go/arrow/memory"
+)
+
+// FixedSizeList represents an immutable sequence of N array values.
+type FixedSizeList struct {
+ array
+ n int32
+ values Interface
+ offsets []int32
+}
+
+// NewFixedSizeListData returns a new List array value, from data.
+func NewFixedSizeListData(data *Data) *FixedSizeList {
+ a := &FixedSizeList{}
+ a.refCount = 1
+ a.setData(data)
+ return a
+}
+
+func (a *FixedSizeList) ListValues() Interface { return a.values }
+
+func (a *FixedSizeList) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ if !a.IsValid(i) {
+ o.WriteString("(null)")
+ continue
+ }
+ j := i + a.array.data.offset
+ beg := int64(a.offsets[j])
+ end := int64(a.offsets[j+1])
+ sub := NewSlice(a.values, beg, end)
+ fmt.Fprintf(o, "%v", sub)
+ sub.Release()
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
+func (a *FixedSizeList) setData(data *Data) {
+ a.array.setData(data)
+ vals := data.buffers[1]
+ if vals != nil {
+ a.offsets = arrow.Int32Traits.CastFromBytes(vals.Bytes())
+ }
+ a.values = MakeFromData(data.childData[0])
+}
+
+// Len returns the number of elements in the array.
+func (a *FixedSizeList) Len() int { return a.array.Len() }
+
+func (a *FixedSizeList) Offsets() []int32 { return a.offsets }
+
+func (a *FixedSizeList) Retain() {
+ a.array.Retain()
+ a.values.Retain()
+}
+
+func (a *FixedSizeList) Release() {
+ a.array.Release()
+ a.values.Release()
+}
+
+type FixedSizeListBuilder struct {
+ builder
+
+ etype arrow.DataType // data type of the list's elements.
+ n int32 // number of elements in the fixed-size list.
+ values Builder // value builder for the list's elements.
+ offsets *Int32Builder
+}
+
+// NewFixedSizeListBuilder returns a builder, using the provided memory allocator.
+// The created list builder will create a list whose elements will be of type etype.
+func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder {
+ return &FixedSizeListBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ etype: etype,
+ n: n,
+ values: newBuilder(mem, etype),
+ offsets: NewInt32Builder(mem),
+ }
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *FixedSizeListBuilder) Release() {
+ debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+ if atomic.AddInt64(&b.refCount, -1) == 0 {
+ if b.nullBitmap != nil {
+ b.nullBitmap.Release()
+ b.nullBitmap = nil
+ }
+ }
+
+ b.values.Release()
+ b.offsets.Release()
+}
+
+func (b *FixedSizeListBuilder) appendNextOffset() {
+ b.offsets.Append(int32(b.values.Len()))
+}
+
+func (b *FixedSizeListBuilder) Append(v bool) {
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(v)
+ b.appendNextOffset()
+}
+
+func (b *FixedSizeListBuilder) AppendNull() {
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(false)
+ b.appendNextOffset()
+}
+
+func (b *FixedSizeListBuilder) AppendValues(offsets []int32, valid []bool) {
+ b.Reserve(len(valid))
+ b.offsets.AppendValues(offsets, nil)
+ b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
+}
+
+func (b *FixedSizeListBuilder) unsafeAppend(v bool) {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ b.length++
+}
+
+func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) {
+ if isValid {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ } else {
+ b.nulls++
+ }
+ b.length++
+}
+
+func (b *FixedSizeListBuilder) init(capacity int) {
+ b.builder.init(capacity)
+ b.offsets.init(capacity + 1)
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *FixedSizeListBuilder) Reserve(n int) {
+ b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *FixedSizeListBuilder) Resize(n int) {
+ if n < minBuilderCapacity {
+ n = minBuilderCapacity
+ }
+
+ if b.capacity == 0 {
+ b.init(n)
+ } else {
+ b.builder.resize(n, b.builder.init)
+ b.offsets.resize(n+1, b.offsets.init)
+ }
+}
+
+func (b *FixedSizeListBuilder) ValueBuilder() Builder {
+ return b.values
+}
+
+// NewArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder
+// so it can be used to build a new array.
+func (b *FixedSizeListBuilder) NewArray() Interface {
+ return b.NewListArray()
+}
+
+// NewListArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder
+// so it can be used to build a new array.
+func (b *FixedSizeListBuilder) NewListArray() (a *FixedSizeList) {
+ if b.offsets.Len() != b.length+1 {
+ b.appendNextOffset()
+ }
+ data := b.newData()
+ a = NewFixedSizeListData(data)
+ data.Release()
+ return
+}
+
+func (b *FixedSizeListBuilder) newData() (data *Data) {
+ values := b.values.NewArray()
+ defer values.Release()
+
+ var offsets *memory.Buffer
+ if b.offsets != nil {
+ arr := b.offsets.NewInt32Array()
+ defer arr.Release()
+ offsets = arr.Data().buffers[1]
+ }
+
+ data = NewData(
+ arrow.FixedSizeListOf(b.n, b.etype), b.length,
+ []*memory.Buffer{
+ b.nullBitmap,
+ offsets,
+ },
+ []*Data{values.Data()},
+ b.nulls,
+ 0,
+ )
+ b.reset()
+
+ return
+}
+
+var (
+ _ Interface = (*FixedSizeList)(nil)
+ _ Builder = (*FixedSizeListBuilder)(nil)
+)
diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go
new file mode 100644
index 0000000..afbf9e8
--- /dev/null
+++ b/go/arrow/array/fixed_size_list_test.go
@@ -0,0 +1,225 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+ "reflect"
+ "testing"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/array"
+ "github.com/apache/arrow/go/arrow/memory"
+)
+
+func TestFixedSizeListArray(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ var (
+ vs = []int32{0, 1, 2, 3, 4, 5, 6}
+ lengths = []int{3, 0, 4}
+ isValid = []bool{true, false, true}
+ offsets = []int32{0, 3, 3, 7}
+ )
+
+ lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32)
+ defer lb.Release()
+
+ for i := 0; i < 10; i++ {
+ vb := lb.ValueBuilder().(*array.Int32Builder)
+ vb.Reserve(len(vs))
+
+ pos := 0
+ for i, length := range lengths {
+ lb.Append(isValid[i])
+ for j := 0; j < length; j++ {
+ vb.Append(vs[pos])
+ pos++
+ }
+ }
+
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+
+ arr.Retain()
+ arr.Release()
+
+ if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ if got, want := arr.Len(), len(isValid); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ for i := range lengths {
+ if got, want := arr.IsValid(i), isValid[i]; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ if got, want := arr.IsNull(i), lengths[i] == 0; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ }
+
+ if got, want := arr.Offsets(), offsets; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ varr := arr.ListValues().(*array.Int32)
+ if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ }
+}
+
+func TestFixedSizeListArrayEmpty(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int32)
+ defer lb.Release()
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+ if got, want := arr.Len(), 0; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+}
+
+func TestFixedSizeListArrayBulkAppend(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ var (
+ vs = []int32{0, 1, 2, 3, 4, 5, 6}
+ lengths = []int{3, 0, 4}
+ isValid = []bool{true, false, true}
+ offsets = []int32{0, 3, 3, 7}
+ )
+
+ lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32)
+ defer lb.Release()
+ vb := lb.ValueBuilder().(*array.Int32Builder)
+ vb.Reserve(len(vs))
+
+ lb.AppendValues(offsets, isValid)
+ for _, v := range vs {
+ vb.Append(v)
+ }
+
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+
+ if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ if got, want := arr.Len(), len(isValid); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ for i := range lengths {
+ if got, want := arr.IsValid(i), isValid[i]; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ if got, want := arr.IsNull(i), lengths[i] == 0; got != want {
+ t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want)
+ }
+ }
+
+ if got, want := arr.Offsets(), offsets; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ varr := arr.ListValues().(*array.Int32)
+ if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+}
+
+func TestFixedSizeListArrayStringer(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ const N = 3
+ var (
+ vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}}
+ isValid = []bool{true, false, true, true}
+ )
+
+ lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32)
+ defer lb.Release()
+
+ vb := lb.ValueBuilder().(*array.Int32Builder)
+ vb.Reserve(len(vs))
+
+ for i, v := range vs {
+ lb.Append(isValid[i])
+ vb.AppendValues(v[:], nil)
+ }
+
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+
+ arr.Retain()
+ arr.Release()
+
+ want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]`
+ if got, want := arr.String(), want; got != want {
+ t.Fatalf("got=%q, want=%q", got, want)
+ }
+}
+
+func TestFixedSizeListArraySlice(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ const N = 3
+ var (
+ vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}}
+ isValid = []bool{true, false, true, true}
+ )
+
+ lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32)
+ defer lb.Release()
+
+ vb := lb.ValueBuilder().(*array.Int32Builder)
+ vb.Reserve(len(vs))
+
+ for i, v := range vs {
+ lb.Append(isValid[i])
+ vb.AppendValues(v[:], nil)
+ }
+
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+
+ arr.Retain()
+ arr.Release()
+
+ want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]`
+ if got, want := arr.String(), want; got != want {
+ t.Fatalf("got=%q, want=%q", got, want)
+ }
+
+ sub := array.NewSlice(arr, 1, 3).(*array.FixedSizeList)
+ defer sub.Release()
+
+ want = `[(null) [6 7 8]]`
+ if got, want := sub.String(), want; got != want {
+ t.Fatalf("got=%q, want=%q", got, want)
+ }
+}
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 3329e50..f393770 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -110,6 +110,16 @@ const (
// MAP is a repeated struct logical type
MAP
+
+ // Custom data type, implemented by user
+ EXTENSION
+
+ // Fixed size list of some logical type
+ FIXED_SIZE_LIST
+
+ // Measure of elapsed time in either seconds, milliseconds, microseconds
+ // or nanoseconds.
+ DURATION
)
// DataType is the representation of an Arrow type.
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index 59a198f..cf3f99b 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -46,6 +46,40 @@ func (t *ListType) String() string { return fmt.Sprintf("list<item: %v>", t.elem
// Elem returns the ListType's element type.
func (t *ListType) Elem() DataType { return t.elem }
+// FixedSizeListType describes a nested type in which each array slot contains
+// a fixed-size sequence of values, all having the same relative type.
+type FixedSizeListType struct {
+ n int32 // number of elements in the list
+ elem DataType // DataType of the list's elements
+}
+
+// FixedSizeListOf returns the list type with element type t.
+// For example, if t represents int32, FixedSizeListOf(10, t) represents [10]int32.
+//
+// FixedSizeListOf panics if t is nil or invalid.
+// FixedSizeListOf panics if n is <= 0.
+func FixedSizeListOf(n int32, t DataType) *FixedSizeListType {
+ if t == nil {
+ panic("arrow: nil DataType")
+ }
+ if n <= 0 {
+ panic("arrow: invalid size")
+ }
+ return &FixedSizeListType{elem: t, n: n}
+}
+
+func (*FixedSizeListType) ID() Type { return FIXED_SIZE_LIST }
+func (*FixedSizeListType) Name() string { return "fixed_size_list" }
+func (t *FixedSizeListType) String() string {
+ return fmt.Sprintf("fixed_size_list<item: %v>[%d]", t.elem, t.n)
+}
+
+// Elem returns the FixedSizeListType's element type.
+func (t *FixedSizeListType) Elem() DataType { return t.elem }
+
+// Len returns the FixedSizeListType's size.
+func (t *FixedSizeListType) Len() int32 { return t.n }
+
// StructType describes a nested type parameterized by an ordered sequence
// of relative types, called its fields.
type StructType struct {
diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go
index ee1fd45..34b7737 100644
--- a/go/arrow/datatype_nested_test.go
+++ b/go/arrow/datatype_nested_test.go
@@ -35,6 +35,7 @@ func TestListOf(t *testing.T) {
PrimitiveTypes.Float32,
PrimitiveTypes.Float64,
ListOf(PrimitiveTypes.Int32),
+ FixedSizeListOf(10, PrimitiveTypes.Int32),
StructOf(),
} {
t.Run(tc.Name(), func(t *testing.T) {
@@ -291,3 +292,65 @@ func TestFieldEqual(t *testing.T) {
})
}
}
+
+func TestFixedSizeListOf(t *testing.T) {
+ for _, tc := range []DataType{
+ FixedWidthTypes.Boolean,
+ PrimitiveTypes.Int8,
+ PrimitiveTypes.Int16,
+ PrimitiveTypes.Int32,
+ PrimitiveTypes.Int64,
+ PrimitiveTypes.Uint8,
+ PrimitiveTypes.Uint16,
+ PrimitiveTypes.Uint32,
+ PrimitiveTypes.Uint64,
+ PrimitiveTypes.Float32,
+ PrimitiveTypes.Float64,
+ ListOf(PrimitiveTypes.Int32),
+ FixedSizeListOf(10, PrimitiveTypes.Int32),
+ StructOf(),
+ } {
+ t.Run(tc.Name(), func(t *testing.T) {
+ const size = 3
+ got := FixedSizeListOf(size, tc)
+ want := &FixedSizeListType{elem: tc, n: size}
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%#v, want=%#v", got, want)
+ }
+
+ if got, want := got.Name(), "fixed_size_list"; got != want {
+ t.Fatalf("got=%q, want=%q", got, want)
+ }
+
+ if got, want := got.ID(), FIXED_SIZE_LIST; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ if got, want := got.Elem(), tc; got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ if got, want := got.Len(), int32(size); got != want {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ })
+ }
+
+ for _, dtype := range []DataType{
+ nil,
+ // (*Int32Type)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ // (*ListType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ // (*StructType)(nil), // FIXME(sbinet): should we make sure this is actually caught?
+ } {
+ t.Run("invalid", func(t *testing.T) {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("test should have panicked but did not")
+ }
+ }()
+
+ _ = ListOf(dtype)
+ })
+ }
+}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 8015cbf..de65d71 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -204,6 +204,82 @@ func Example_listArray() {
// List = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
}
+// This example shows how to create a FixedSizeList array.
+// The resulting array should be:
+// [[0, 1, 2], (null), [3, 4, 5], [6, 7, 8], (null)]
+func Example_fixedSizeListArray() {
+ pool := memory.NewGoAllocator()
+
+ lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int64)
+ defer lb.Release()
+
+ vb := lb.ValueBuilder().(*array.Int64Builder)
+ defer vb.Release()
+
+ vb.Reserve(10)
+
+ lb.Append(true)
+ vb.Append(0)
+ vb.Append(1)
+ vb.Append(2)
+
+ lb.AppendNull()
+
+ lb.Append(true)
+ vb.Append(3)
+ vb.Append(4)
+ vb.Append(5)
+
+ lb.Append(true)
+ vb.Append(6)
+ vb.Append(7)
+ vb.Append(8)
+
+ lb.AppendNull()
+
+ arr := lb.NewArray().(*array.FixedSizeList)
+ defer arr.Release()
+
+ fmt.Printf("NullN() = %d\n", arr.NullN())
+ fmt.Printf("Len() = %d\n", arr.Len())
+ fmt.Printf("Offsets() = %v\n", arr.Offsets())
+ fmt.Printf("Type() = %v\n", arr.DataType())
+
+ offsets := arr.Offsets()[1:]
+
+ varr := arr.ListValues().(*array.Int64)
+
+ pos := 0
+ for i := 0; i < arr.Len(); i++ {
+ if !arr.IsValid(i) {
+ fmt.Printf("List[%d] = (null)\n", i)
+ continue
+ }
+ fmt.Printf("List[%d] = [", i)
+ for j := pos; j < int(offsets[i]); j++ {
+ if j != pos {
+ fmt.Printf(", ")
+ }
+ fmt.Printf("%v", varr.Value(j))
+ }
+ pos = int(offsets[i])
+ fmt.Printf("]\n")
+ }
+ fmt.Printf("List = %v\n", arr)
+
+ // Output:
+ // NullN() = 2
+ // Len() = 5
+ // Offsets() = [0 3 3 6 9 9]
+ // Type() = fixed_size_list<item: int64>[3]
+ // List[0] = [0, 1, 2]
+ // List[1] = (null)
+ // List[2] = [3, 4, 5]
+ // List[3] = [6, 7, 8]
+ // List[4] = (null)
+ // List = [[0 1 2] (null) [3 4 5] [6 7 8] (null)]
+}
+
// This example shows how to create a Struct array.
// The resulting array should be:
// [{‘joe’, 1}, {null, 2}, null, {‘mark’, 4}]
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index bbdb3c0..c3ee8f1 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -36,6 +36,7 @@ func init() {
Records["structs"] = makeStructsRecords()
Records["lists"] = makeListsRecords()
Records["strings"] = makeStringsRecords()
+ Records["fixed_size_lists"] = makeFixedSizeListsRecords()
for k := range Records {
RecordNames = append(RecordNames, k)
@@ -220,6 +221,56 @@ func makeListsRecords() []array.Record {
return recs
}
+func makeFixedSizeListsRecords() []array.Record {
+ mem := memory.NewGoAllocator()
+ const N = 3
+ dtype := arrow.FixedSizeListOf(N, arrow.PrimitiveTypes.Int32)
+ schema := arrow.NewSchema([]arrow.Field{
+ {Name: "fixed_size_list_nullable", Type: dtype, Nullable: true},
+ }, nil)
+
+ mask := []bool{true, false, true}
+
+ chunks := [][]array.Interface{
+ []array.Interface{
+ fixedSizeListOf(mem, N, []array.Interface{
+ arrayOf(mem, []int32{1, 2, 3}, mask),
+ arrayOf(mem, []int32{11, 12, 13}, mask),
+ arrayOf(mem, []int32{21, 22, 23}, mask),
+ }, nil),
+ },
+ []array.Interface{
+ fixedSizeListOf(mem, N, []array.Interface{
+ arrayOf(mem, []int32{-1, -2, -3}, mask),
+ arrayOf(mem, []int32{-11, -12, -13}, mask),
+ arrayOf(mem, []int32{-21, -22, -23}, mask),
+ }, nil),
+ },
+ []array.Interface{
+ fixedSizeListOf(mem, N, []array.Interface{
+ arrayOf(mem, []int32{-1, -2, -3}, mask),
+ arrayOf(mem, []int32{-11, -12, -13}, mask),
+ arrayOf(mem, []int32{-21, -22, -23}, mask),
+ }, []bool{true, false, true}),
+ },
+ }
+
+ defer func() {
+ for _, chunk := range chunks {
+ for _, col := range chunk {
+ col.Release()
+ }
+ }
+ }()
+
+ recs := make([]array.Record, len(chunks))
+ for i, chunk := range chunks {
+ recs[i] = array.NewRecord(schema, chunk, -1)
+ }
+
+ return recs
+}
+
func makeStringsRecords() []array.Record {
mem := memory.NewGoAllocator()
schema := arrow.NewSchema([]arrow.Field{
@@ -385,6 +436,30 @@ func listOf(mem memory.Allocator, values []array.Interface, valids []bool) *arra
return bldr.NewListArray()
}
+func fixedSizeListOf(mem memory.Allocator, n int32, values []array.Interface, valids []bool) *array.FixedSizeList {
+ if mem == nil {
+ mem = memory.NewGoAllocator()
+ }
+
+ bldr := array.NewFixedSizeListBuilder(mem, n, values[0].DataType())
+ defer bldr.Release()
+
+ valid := func(i int) bool {
+ return valids[i]
+ }
+
+ if valids == nil {
+ valid = func(i int) bool { return true }
+ }
+
+ for i, value := range values {
+ bldr.Append(valid(i))
+ buildArray(bldr.ValueBuilder(), value)
+ }
+
+ return bldr.NewListArray()
+}
+
func structOf(mem memory.Allocator, dtype *arrow.StructType, fields []array.Interface, valids []bool) *array.Struct {
if mem == nil {
mem = memory.NewGoAllocator()
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 81462ba..b51a356 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -371,6 +371,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
case *arrow.ListType:
return ctx.loadList(dt)
+ case *arrow.FixedSizeListType:
+ return ctx.loadFixedSizeList(dt)
+
case *arrow.StructType:
return ctx.loadStruct(dt)
@@ -465,6 +468,19 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
return array.NewListData(data)
}
+func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) array.Interface {
+ field, buffers := ctx.loadCommon(2)
+ buffers = append(buffers, ctx.buffer())
+
+ sub := ctx.loadChild(dt.Elem())
+ defer sub.Release()
+
+ data := array.NewData(dt, int(field.Length()), buffers, []*array.Data{sub.Data()}, int(field.NullCount()), 0)
+ defer data.Release()
+
+ return array.NewFixedSizeListData(data)
+}
+
func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) array.Interface {
field, buffers := ctx.loadCommon(1)
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index b9718bd..82afedc 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -323,6 +323,13 @@ func (fv *fieldVisitor) visit(dt arrow.DataType) {
flatbuf.ListStart(fv.b)
fv.offset = flatbuf.ListEnd(fv.b)
+ case *arrow.FixedSizeListType:
+ fv.dtype = flatbuf.TypeFixedSizeList
+ fv.kids = append(fv.kids, fieldToFB(fv.b, arrow.Field{Name: "item", Type: dt.Elem()}, fv.memo))
+ flatbuf.FixedSizeListStart(fv.b)
+ flatbuf.FixedSizeListAddListSize(fv.b, dt.Len())
+ fv.offset = flatbuf.FixedSizeListEnd(fv.b)
+
default:
err := errors.Errorf("arrow/ipc: invalid data type %v", dt)
panic(err) // FIXME(sbinet): implement all data-types.
@@ -500,6 +507,14 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
}
return arrow.ListOf(children[0].Type), nil
+ case flatbuf.TypeFixedSizeList:
+ var dt flatbuf.FixedSizeList
+ dt.Init(data.Bytes, data.Pos)
+ if len(children) != 1 {
+ return nil, errors.Errorf("arrow/ipc: FixedSizeList must have exactly 1 child field (got=%d)", len(children))
+ }
+ return arrow.FixedSizeListOf(dt.ListSize(), children[0].Type), nil
+
case flatbuf.TypeStruct_:
return arrow.StructOf(children...), nil
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index e1aff57..def8f17 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -336,6 +336,44 @@ func (w *recordEncoder) visit(p *payload, arr array.Interface) error {
}
w.depth++
+ case *arrow.FixedSizeListType:
+ arr := arr.(*array.FixedSizeList)
+ voffsets, err := w.getZeroBasedValueOffsets(arr)
+ if err != nil {
+ return errors.Wrapf(err, "could not retrieve zero-based value offsets for array %T", arr)
+ }
+ p.body = append(p.body, voffsets)
+
+ w.depth--
+ var (
+ values = arr.ListValues()
+ mustRelease = false
+ values_offset int64
+ values_length int64
+ )
+ defer func() {
+ if mustRelease {
+ values.Release()
+ }
+ }()
+
+ if voffsets != nil {
+ values_offset = int64(arr.Offsets()[0])
+ values_length = int64(arr.Offsets()[arr.Len()]) - values_offset
+ }
+
+ if len(arr.Offsets()) != 0 || values_length < int64(values.Len()) {
+ // must also slice the values
+ values = array.NewSlice(values, values_offset, values_length)
+ mustRelease = true
+ }
+ err = w.visit(p, values)
+
+ if err != nil {
+ return errors.Wrapf(err, "could not visit list element for array %T", arr)
+ }
+ w.depth++
+
default:
panic(errors.Errorf("arrow/ipc: unknown array %T (dtype=%T)", arr, dtype))
}
diff --git a/go/arrow/type_string.go b/go/arrow/type_string.go
index b41c5ac..cc0917e 100644
--- a/go/arrow/type_string.go
+++ b/go/arrow/type_string.go
@@ -4,9 +4,46 @@ package arrow
import "strconv"
-const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64HALF_FLOATFLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVALDECIMALLISTSTRUCTUNIONDICTIONARYMAP"
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[NULL-0]
+ _ = x[BOOL-1]
+ _ = x[UINT8-2]
+ _ = x[INT8-3]
+ _ = x[UINT16-4]
+ _ = x[INT16-5]
+ _ = x[UINT32-6]
+ _ = x[INT32-7]
+ _ = x[UINT64-8]
+ _ = x[INT64-9]
+ _ = x[HALF_FLOAT-10]
+ _ = x[FLOAT32-11]
+ _ = x[FLOAT64-12]
+ _ = x[STRING-13]
+ _ = x[BINARY-14]
+ _ = x[FIXED_SIZE_BINARY-15]
+ _ = x[DATE32-16]
+ _ = x[DATE64-17]
+ _ = x[TIMESTAMP-18]
+ _ = x[TIME32-19]
+ _ = x[TIME64-20]
+ _ = x[INTERVAL-21]
+ _ = x[DECIMAL-22]
+ _ = x[LIST-23]
+ _ = x[STRUCT-24]
+ _ = x[UNION-25]
+ _ = x[DICTIONARY-26]
+ _ = x[MAP-27]
+ _ = x[EXTENSION-28]
+ _ = x[FIXED_SIZE_LIST-29]
+ _ = x[DURATION-30]
+}
+
+const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64HALF_FLOATFLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVALDECIMALLISTSTRUCTUNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATION"
-var _Type_index = [...]uint8{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 60, 67, 74, 80, 86, 103, 109, 115, 124, 130, 136, 144, 151, 155, 161, 166, 176, 179}
+var _Type_index = [...]uint8{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 60, 67, 74, 80, 86, 103, 109, 115, 124, 130, 136, 144, 151, 155, 161, 166, 176, 179, 188, 203, 211}
func (i Type) String() string {
if i < 0 || i >= Type(len(_Type_index)-1) {